Scrape all XKCD comics

A quick script I threw together… I wanted to be prepared (in accordance with http://xkcd.com/512/)

import urllib2,urllib,os

num = 1
while num < 921:
     url = "http://xkcd.com/"+str(num)+"/"
     try:
         request = urllib2.Request(url, None)
         f = urllib2.urlopen(request)
         html = f.read()
         f.close()
         
         pi1 = html.split("Image URL (for hotlinking/embedding): ")
         pi2 = pi1[1].split("</h3>")
         img_url = pi2[0]
         
         comicdir = os.getcwd() + '/comics/' + os.path.basename(img_url)
         if not os.path.exists(comicdir):
             urllib.urlretrieve(img_url, comicdir)
             print str(num) + " downloaded."
         else:
             print str(num) + " skipped."
     except:
         print str(num) + " failed."
     num = num + 1
 
 
 

Leave a Comment