爬蟲入門學習 貼吧小案例
阿新 • • 發佈:2019-02-23
爬蟲入門 code 請求 color baidu bsp 客戶 d+ 編碼
1 import urllib.request
2 import urllib.parse
3 import random
4
5 #目標地址
6 url="http://tieba.baidu.com/f"
7
8 #偽造客戶端 http請求頭
9 ua_list = [
10 "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
11 "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1 ",
12 "User-Agent: Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
13 "User-Agent: Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
14 "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11 ",
15 "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36"
16 ]
17 #隨機選擇一個作為請求頭
18 user_agent=random.choice(ua_list)
19 def doWrite(html,f_name):
20 """
21 :param html: 請求得到響應後收到的數據
22 :param f_name: 用於保存寫操作的文件名
23 :return: E:\Demopy\swt
24 """
25 with open(f_name,"w",encoding=‘utf8‘)as f:
26 f.write(html)
27 print(">"*30)
28
29 def loadPage(f_url,f_name):
30 #對目標地址進行請求
31 request=urllib.request.Request(f_url)
32 #設置http請求頭
33 request.add_header("User-Agent",user_agent)
34 #獲取響應數據
35 response=urllib.request.urlopen(request)
36 html=response.read().decode("utf-8")
37 #下載並保存
38 print("準備寫入數據....")
39 doWrite(html,f_name)
40
41 def doCode(url,kwd):
42 ‘‘‘
43 對搜索關鍵字進行編碼
44 :return:
45 ‘‘‘
46 kw={"kw":kwd}
47 kw=urllib.parse.urlencode(kw)
48 #關鍵字拼接
49 full_url=url+"?"+kw
50 return full_url
51
52
53 def doUrl(url,star,end):
54 ‘‘‘
55 拼接url地址
56 ‘‘‘
57 for pages in range(star,end+1):
58 page=(pages-1)*50
59
60 f_url=url+"&pn="+str(page)
61 f_name="第"+str(pages)+"頁"+".html"
62 print("即將加載第{0}頁數據".format(pages))
63 loadPage(f_url,f_name)
64 print("下載完成,謝謝使用!")
65
66 if __name__ == ‘__main__‘:
67 tb_name=input("請輸入要訪問的貼吧名:\n")
68 starPage=int(input("請輸入起始頁"))
69 endPage=int(input("請輸入結束頁"))
70
71 full_url=doCode(url,tb_name)
72 doUrl(full_url,starPage,endPage)
爬蟲入門學習 貼吧小案例