added basic image scraping functionality

nateberman · nateberman · commit 0823065fa96c · 2014-04-18T12:18:42.000-05:00
diff --git a/image_scraper.py b/image_scraper.py
@@ -0,0 +1,26 @@
+from bs4 import BeautifulSoup
+from urllib2 import urlopen
+import urllib
+
+# use this image scraper from the location that 
+#you want to save scraped images to
+
+def make_soup(url):
+	html = urlopen(url).read()
+	return BeautifulSoup(html)
+
+def get_images(url):
+	soup = make_soup(url)
+	#this makes a list of bs4 element tags
+	images = [img for img in soup.findAll('img')]
+	print (str(len(images)) + "images found.")
+	print 'Downloading images to current working directory.'
+	#compile our unicode list of image links
+	image_links = [each.get('src') for each in images]
+	for each in image_links:
+		filename=each.split('/')[-1]
+		urllib.urlretrieve(each, filename)
+	return image_links
+
+#a standard call looks like this
+#imgs = get_images('http://www.wookmark.com')