Crawler img location sites from National Geographic web site & downloading them.
from bs4 import BeautifulSoup from urllib.request import urlopen import re import requests
## Starting resuqest html = urlopen("http://www.nationalgeographic.com.cn/animals/").read().decode('utf-8') soup = BeautifulSoup(html, features='lxml')
img_links = soup.find_all("img", {"src": re.compile('http://image..*?\.jpg')}) for link in img_links: print(link['src']) # pic locationg
## With adding this ## mkdir img # 创建一个img文件夹
for link in img_links: print(link['src']) if link['src'][0:4] == 'http': url = link['src'] r = requests.get(url, stream=True) image_name = url.split('/')[-1] withopen('./img/%s' % image_name, 'wb') as f: for chunk in r.iter_content(chunk_size=128): f.write(chunk) print('Saved %s' % image_name)