Scrape all XKCD comics

A quick script I threw together… I wanted to be prepared (in accordance with http://xkcd.com/512/)

import urllib2,urllib,os

num = 1
while num < 921:
    url = "http://xkcd.com/"+str(num)+"/"
    try:
        request = urllib2.Request(url, None)
        f = urllib2.urlopen(request)
        html = f.read()
        f.close()
        
        pi1 = html.split("Image URL (for hotlinking/embedding): ")
        pi2 = pi1[1].split("</h3>")
        img_url = pi2[0]
        
        comicdir = os.getcwd() + '/comics/' + os.path.basename(img_url)
        if not os.path.exists(comicdir):
            urllib.urlretrieve(img_url, comicdir)
            print str(num) + " downloaded."
        else:
            print str(num) + " skipped."
    except:
        print str(num) + " failed."
    num = num + 1


Leave a Comment