Skip to content

Commit 0823065

Browse files
committed
added basic image scraping functionality
0 parents  commit 0823065

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

image_scraper.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from bs4 import BeautifulSoup
2+
from urllib2 import urlopen
3+
import urllib
4+
5+
# use this image scraper from the location that
6+
#you want to save scraped images to
7+
8+
def make_soup(url):
9+
html = urlopen(url).read()
10+
return BeautifulSoup(html)
11+
12+
def get_images(url):
13+
soup = make_soup(url)
14+
#this makes a list of bs4 element tags
15+
images = [img for img in soup.findAll('img')]
16+
print (str(len(images)) + "images found.")
17+
print 'Downloading images to current working directory.'
18+
#compile our unicode list of image links
19+
image_links = [each.get('src') for each in images]
20+
for each in image_links:
21+
filename=each.split('/')[-1]
22+
urllib.urlretrieve(each, filename)
23+
return image_links
24+
25+
#a standard call looks like this
26+
#imgs = get_images('http://www.wookmark.com')

0 commit comments

Comments
 (0)