[PYTHON] Download all photos from a Thumblr blog


SUBMITTED BY: Guest

DATE: Oct. 20, 2013, 3:33 p.m.

FORMAT: Text only

SIZE: 1.6 kB

HITS: 1097

  1. import os, sys
  2. from shutil import copyfileobj
  3. from urllib import urlopen
  4. from xml.etree import ElementTree as ET
  5. if len(sys.argv) != 2:
  6. print >> sys.stderr, "Pass tumblr name as argument"
  7. sys.exit()
  8. tumblr_name = sys.argv[1]
  9. api_endpoint = 'http://%s.tumblr.com/api/read' % tumblr_name
  10. start = 0
  11. num = 50
  12. post_count = 1
  13. while post_count:
  14. resp = urlopen("%s?type=photo&start=%s&num=%s" % (api_endpoint, start, num))
  15. content = resp.read()
  16. tree = ET.fromstring(content)
  17. post_tags = tree.findall(".//post")
  18. post_count = len(post_tags)
  19. for post_tag in post_tags:
  20. post_id = post_tag.attrib['id']
  21. post_date = post_tag.attrib['date-gmt'].split(" ")[0]
  22. outname = "%s-%s-%s.jpeg" % (tumblr_name, post_date, post_id)
  23. if os.path.exists(outname):
  24. print "%s already downloaded" % outname
  25. continue
  26. for photo_tag in post_tag.findall(".//photo-url"):
  27. if photo_tag.attrib['max-width'] == "1280":
  28. photo_url = photo_tag.text
  29. resp = urlopen(photo_url)
  30. outfile = open(outname, 'w')
  31. copyfileobj(resp, outfile)
  32. outfile.close()
  33. print "Downloaded %s to %s" % (photo_url, outname)
  34. start += num

comments powered by Disqus