python 獲取京東暢銷書目錄和網址!贊一個
阿新 • • 發佈:2018-11-29
從事python開發有一年多時間,起初是學習全棧的,工作中主要是在做後端開發,現在練練手,瞭解一下最新爬蟲思路和爬蟲方法。
#!/usr/bin/env python # encoding=utf-8 import requests from bs4 import BeautifulSoup from requests import HTTPError HTTP_ = 'http:' def download_page(url): print(url) try: data = requests.get(url).content except HTTPError as err: print(err.__traceback__) except ConnectionError as err: print(err.__traceback__) except TimeoutError as err: print(err.__traceback__) return data def parse_html(html): soup = BeautifulSoup(html, "html.parser") book_list_soup = soup.find_all('div', attrs={'class': 'p-detail'}) for book_li in book_list_soup: a_tag = book_li.find('a', attrs={'class': 'p-name'}) print a_tag # print '書名 {} 連結 HTTP_{}'.format(a_tag['title'],a_tag['href']) # print('書名 : ' + a_tag['title'] + '\t連結 : ' + HTTP_ + a_tag['href']) next_button = soup.find('a', attrs={'class': 'pn-next'}) return next_button['href'] def main(): download_url = '//book.jd.com/booktop/0-0-0.html?category=3287-0-0-0-5-1#comfort' while download_url != 'javascript:void(0);': html = download_page(HTTP_ + download_url) download_url = parse_html(html) if __name__ == '__main__': main()
執行結果:
請使用手機"掃一掃"x