1. 程式人生 > >Python爬蟲-urllib的基本用法

Python爬蟲-urllib的基本用法

quest resp lan roc 用法 rom handler baidu github

from urllib import response,request,parse,error
from http import  cookiejar



if __name__ == ‘__main__‘:
    #response = urllib.request.urlopen("http://www.baidu.com")
    #print(response.read().decode("utf-8"))

    #以post形式發送,沒有data就是get形式
    #請求頭
    #data = bytes(urllib.parse.urlencode({"word":"hello"}),encoding="utf-8")
#response = urllib.request.urlopen("http://httpbin.org/post",data=data) #print(response.read()) #時間限制 #response = urllib.request.urlopen("http://www.baidu.com",timeout=0.01) #print(response.read().decode("utf-8")) #響應處理 #response = urllib.request.urlopen("http://www.python.org")
#print(type(response)) #狀態碼 #print(response.status) #相應頭 #print(response.getheaders()) #print(response.getheader("Server")) #復雜請求 request #request = urllib.request.Request("http://python.org") #response = urllib.request.urlopen(request) #print(response.read().decode("utf-8"))
#請求頭 # add_header也可以 """ url = "http://httpbin.org/post" headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Host":"httpbin.org" } dict = { "name":"Germey" } data = bytes(parse.urlencode(dict),encoding="utf8") req = request.Request(url,data,headers,method="POST") response = request.urlopen(req); print(response.read()) """ #代理 """ proxy_header = request.ProxyHandler({ #代理IP }) opener = request.build_opener(proxy_header) response = opener.open("http://httpbin.org/get") #cookies(維持登錄狀態) cookie = cookiejar.CookieJar() handler = request.HTTPCookieProcessor(cookie) opener = request.build_opener(handler) response = opener.open("http://www.baidu.com") """ #保存cookies #MozillaCookieJar,LWPCookieJar #捕捉異常 基本上HTTPError或者URLError """ try: response = request.urlopen("http://amojury.github.io") except error.URLError as e: print(e.reason) """ #URL解析相關 urlparse urlunparse(反解析) urlencode(字典轉請求參數) #result = parse.urlparse("https://www.baidu.com/s?ie=utf-8&f=3&rsv_bp=0&rsv_idx=1&tn=baidu&wd=python%20%E6%89%B9%E9%87%8F%E6%B3%A8%E9%87%8A&rsv_pq=f9b1a8b300011700&rsv_t=1252nVpaBhdm%2FEdlsdrPgUxIHLfk4QNB443eSTUKoRcHFx9G09YZi9N9Dvo&rqlang=cn&rsv_enter=1&rsv_sug3=9&rsv_sug1=8&rsv_sug7=101&rsv_sug2=1&prefixsug=python%2520%25E6%2589%25B9%25E9%2587%258F&rsp=0&inputT=10498&rsv_sug4=14994") #print(result)

Python爬蟲-urllib的基本用法