簡單的爬蟲知識(2)
阿新 • • 發佈:2018-11-10
cookie的使用
- 方法1:
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
"Cookie":"uuid_tt_dd=10_19718537550-1536577558921-234799; ARK_ID=JS0fe5ebf9d5a07322390daed32f9d99670fe5; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=1788*1*PC_VC; UserName=qq_41386300; UserInfo=bns5MLc7KeLvDk%2BkfGg1Pxo7iNJkoYBWkM3E5J3mvMsklRJlZpj1RNC%2B2Zlg8Uq6q6xo1W9RbKR6bJfh8S9IAg%3D%3D; UserNick=qq_41386300; UN=qq_41386300; AU=F14; BT=1539156078977; UserToken=bns5MLc7KeLvDk%2BkfGg1Pxo7iNJkoYBWkM3E5J3mvMsklRJlZpj1RNC%2B2Zlg8Uq6iO%2Fl5Ordr5xQ%2Bxp3grKvR6oGjzZbR6k9HLhESqpENvZ1Lj5ybXvdMzheCjHTkooh; smidV2=20180917142312d29d0f3df9d232398a0800db7b0b05df00eded7a513fd7df0; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1539077338,1539086159,1539151528,1539155154; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1539156156; dc_tos=pgdo7v; dc_session_id=10_1539155152744.565800"
}
- 方法2:完整程式碼
from urllib.request import build_opener,Request,ProxyHandler,HTTPCookieProcessor
from urllib.parse import urlencode
datas={
"user":"17703181473",
"password":"123456"
}
#登入
login_url="http://www.sxt.cn/index/login/login.html"
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" ,
}
request=Request(login_url,headers=headers,data=urlencode(datas).encode())
handler=HTTPCookieProcessor()
opener=build_opener(handler)
response=opener.open(request)
info=response.read().decode()
print(info)
#訪問頁面
info_url="http://www.sxt.cn/index/user.html"
request=Request(info_url,headers=headers)
response= opener.open(request)
info=response.read().decode()
print(info)
- 將cookie儲存在檔案中已經從檔案中取出cookie
#部分程式碼
#儲存到檔案中
cookie_jar=MozillaCookieJar()
handler=HTTPCookieProcessor(cookie_jar)
cookie_jar.save('cookie.txt',ignore_expires=True,ignore_discard=True)
#從檔案中取出
cookie_jar=MozillaCookieJar()
cookie_jar.load('cookie.txt',ignore_discard=True,ignore_expires=True)
handler=HTTPCookieProcessor(cookie_jar)