利用Python批量爬取XKCD動漫圖片,並批量儲存
阿新 • • 發佈:2018-11-11
import requests, os, bs4 url = 'https://xkcd.com' os.makedirs('xkcd',exist_ok = True) while not url.endswith('#'): # download the page print('downloading the %s...'%(url)) # '%s...'%(url)對字串及進行替換 res = requests.get(url) res.raise_for_status() # 返回請求的狀態 soup = bs4.BeautifulSoup(res.text, 'lxml') # find the url of the comic image comicele = soup.select('#comic img') print(comicele) if comicele == []: print('could not find comic image') else: comicurl = comicele[0].get('src') print(comicurl) ## print('downloading image %s...'% (comicurl)) res = requests.get(comicurl) res.raise_for_status() # download the image # save the image to ./xkcd imagefile = open(os.path.json('xkcd',os.path.basename(comicurl)), 'wb') for chunk in res.iter_content(100000): imagefile.write(chunk) imagefile.close() # get the prev button's url privelink = soup.select('a[rel = "prev"]')[0] url = 'https://xkcd.com' + prevlink.get('href') print('DONE')