[PYTHON] Download all photos from a Thumblr blog

SUBMITTED BY: Guest

DATE: Oct. 20, 2013, 3:33 p.m.

FORMAT: Text only

SIZE: 1.6 kB

Raw Download

HITS: 1097

Report

	

    import os, sys
    from shutil import copyfileobj
    from urllib import urlopen
    from xml.etree import ElementTree as ET
     
    if len(sys.argv) != 2:
        print >> sys.stderr, "Pass tumblr name as argument"
        sys.exit()
     
    tumblr_name = sys.argv[1]
    api_endpoint = 'http://%s.tumblr.com/api/read' % tumblr_name
    start = 0
    num = 50
    post_count = 1
     
    while post_count:
        resp = urlopen("%s?type=photo&start=%s&num=%s" % (api_endpoint, start, num))
        content = resp.read()
        tree = ET.fromstring(content)
        post_tags = tree.findall(".//post")
        post_count = len(post_tags)
        for post_tag in post_tags:
            post_id = post_tag.attrib['id']
            post_date = post_tag.attrib['date-gmt'].split(" ")[0]
            outname = "%s-%s-%s.jpeg" % (tumblr_name, post_date, post_id)
            if os.path.exists(outname):
                print "%s already downloaded" % outname
                continue
            for photo_tag in post_tag.findall(".//photo-url"):
                if photo_tag.attrib['max-width'] == "1280":
                    photo_url = photo_tag.text
                    resp = urlopen(photo_url)
                    outfile = open(outname, 'w')
                    copyfileobj(resp, outfile)
                    outfile.close()
                    print "Downloaded %s to %s" % (photo_url, outname)
        start += num