1. 程式人生 > >python 獲取京東暢銷書目錄和網址!贊一個

python 獲取京東暢銷書目錄和網址!贊一個

 

從事python開發有一年多時間,起初是學習全棧的,工作中主要是在做後端開發,現在練練手,瞭解一下最新爬蟲思路和爬蟲方法。

#!/usr/bin/env python

# encoding=utf-8


import requests
from bs4 import BeautifulSoup
from requests import HTTPError


HTTP_ = 'http:'




def download_page(url):
    print(url)
    try:
        data = requests.get(url).content
    except HTTPError as err:
        print(err.__traceback__)
    except ConnectionError as err:
        print(err.__traceback__)
    except TimeoutError as err:
        print(err.__traceback__)


    return data




def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")
    book_list_soup = soup.find_all('div', attrs={'class': 'p-detail'})
    for book_li in book_list_soup:
        a_tag = book_li.find('a', attrs={'class': 'p-name'})
        print a_tag
        # print '書名 {} 連結 HTTP_{}'.format(a_tag['title'],a_tag['href'])
        # print('書名 : ' + a_tag['title'] + '\t連結 : ' + HTTP_ + a_tag['href'])
    next_button = soup.find('a', attrs={'class': 'pn-next'})
    return next_button['href']




def main():
    download_url = '//book.jd.com/booktop/0-0-0.html?category=3287-0-0-0-5-1#comfort'
    while download_url != 'javascript:void(0);':
        html = download_page(HTTP_ + download_url)
        download_url = parse_html(html)




if __name__ == '__main__':
    main()

執行結果:

請使用手機"掃一掃"x