python3.7 爬取QQ空間好友
阿新 • • 發佈:2018-01-30
cgi urlencode version == tab pan ont 訪問權限 host
使用selenium庫自動登錄,記錄登錄的Cookie。以下URL分別代表不同的動作,雖然沒有全用。
留言:
https://user.qzone.qq.com/proxy/domain/m.qzone.qq.com/cgi-bin/new/get_msgb?
uin=1612893772&hostUin=1148639090&start=0&s=0.8883444517176473&format=jsonp&num=10
&inCharset=utf-8&outCharset=utf-8&g_tk=2208268
&qzonetoken=bede67d5ca4dc0944791e45f795beeb346e50a23b20df9b4152a142232a7f7cd40e26b929798e3b74bab&g_tk=2208268
好友:
https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?
uin=1612893772&do=1&rd=0.19169828437926406&fupdate=1&clean=1&g_tk=108064521
&qzonetoken=77bdd3f44636c7b403a6462f493a2e6e02e6b8cd1772fe928bf511442e491315df84454ad4455093f2&g_tk=108064521
說說:
https://h5.qzone.qq.com/proxy/domain/ic2.qzone.qq.com/cgi-bin/feeds/feeds_html_module?i_uin=1148639090&i_login_uin=1612893772
&mode=4&previewV8=1&style=25&version=8&needDelOpr=true&transparence=true&hideExtend=false
&showcount=5&MORE_FEEDS_CGI=http://ic2.qzone.qq.com/cgi-bin/feeds/feeds_html_act_all&refer=2¶mstring=os-winxp|100
興趣愛好:
https://h5.qzone.qq.com/proxy/domain/page.qq.com/cgi-bin/profile/interest_get?
uin=851676467&vuin=1612893772&flag=1&rd=0.7835457101159748&fupdate=1&
g_tk=896484925&qzonetoken=38bcb8fb59e772a31ff4ca2358781258d1c7f4e2c8f640e537d6bf52ccc4ab48c7614fa3a57a5cabf0
以下是具體代碼:
1 from urllib import parse
2 from selenium import webdriver
3 import requests
4 import json
5 from json import loads
6 import time
7 import pymssql
8 import datetime
9
10 def get_key_values(body,key,end =‘;‘):
11 """提取body中不包括的key,分片操作
12
13 :param body: 父字符串
14 :param key: 子字符串
15 :param end: 結束字符串,默認為;
16 :return: 不包括子字符串的字符串
17 """
18 return body[body.find(key) + len(key): body.find(‘;‘, body.find(key))]
19
20 def get_key(cookies):
21 """獲取cookie中的相關鍵的值
22 解密
23
24 :param cookies: 緩存
25 :return: 相關鍵的值
26 """
27 key = get_key_values(cookies,‘p_skey=‘)
28 h = 5381
29 for i in key:
30 h += (h << 5) + ord(i)
31 return h & 2147483647
32
33 def web_login_cookie():
34 """url = ‘https://user.qzone.qq.com/QQ號相關的緩存
35 實現自動化登錄
36
37 :return: 瀏覽器的緩存
38 """
39 driver = webdriver.Chrome()
40 qq_account = ‘1612893772‘
41 qq_password = ‘13974162858x‘
42 login(driver,qq_account,qq_password)
43 time.sleep(10)
44 driver.get(‘https://user.qzone.qq.com/{}‘.format(qq_account))
45 cookie = ‘‘
46 for elem in driver.get_cookies(): # 記錄登錄的Cookie
47 # elem 為 dict類型
48 cookie += elem["name"] + "=" + elem["value"] + ";"
49 # cookies = cookie
50 return cookie
51
52 def login(driver,qq_account,qq_password):
53 """登錄
54
55 :param driver: 瀏覽器對象
56 :param qq_account: QQ賬號
57 :param qq_password: QQ密碼
58 :return:
59 """
60 driver.maximize_window()
61 driver.get(‘http://user.qzone.qq.com‘)
62 driver.switch_to.frame(‘login_frame‘)
63 time.sleep(1)
64 driver.find_element_by_id("switcher_plogin").click()
65 driver.find_element_by_id("u").send_keys(qq_account)
66 time.sleep(2)
67 driver.find_element_by_id("p").send_keys(qq_password)
68 time.sleep(2)
69 driver.find_element_by_id("login_button").click()
70
71 def send_requests(req,headers,url,params=None):
72 """url_friend = ‘https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?‘
73 url_friend攜帶以下參數:uin(QQ號)、do(沒有它,返回空,默認值為:1)
74 rd,g_t,qzonetoken(每次登錄都發生變化,從Cookiezz中獲取)
75 fupdate,clean(默認值為:1)
76
77 :param req: 請求(Request),該請求為會話
78 :param headers: 請求頭
79 :param params: 請求參數
80 :return: JSONP數據
81 """
82 if None != params:
83 url = url + parse.urlencode(params)
84 # url = url+‘&offset=‘+str(0)
85 page = req.get(url=url, headers=headers)
86 return page.text
87
88 def get_each_str(req,uin,headers):
89 each_url = ‘https://user.qzone.qq.com/{}‘.format(uin)
90 page = req.get(url=each_url, headers=headers)
91
92 def friend_db(dicts,name=‘‘):
93 """操作DB
94
95 :param dicts: 數據字典信息
96 :param name: 備註名
97 :return: void
98 """
99 if len(str(dicts[‘birthyear‘])) < 4:
100 dicts[‘birthyear‘] = ‘1900‘
101 if dicts[‘birthday‘][1:2] == ‘0‘:
102 dicts[‘birthday‘] = ‘01-01‘
103 if len(dicts[‘signature‘]) > 70:
104 dicts[‘signature‘] = ‘‘
105 friend_db_dict = {
106 ‘friendInfo‘: [
107 dicts[‘uin‘], name, dicts[‘age‘], ‘男‘ if dicts[‘sex‘] == 1 else ‘女‘
108 , datetime.datetime.strptime(str(dicts[‘birthyear‘]) + ‘-‘ + str(dicts[‘birthday‘]), ‘%Y-%m-%d‘)],
109 ‘friendPlace‘: [
110 dicts[‘uin‘], dicts[‘company‘],dicts[‘career‘], dicts[‘hco‘] + dicts[‘hp‘] + dicts[‘hc‘],
111 dicts[‘country‘] + dicts[‘province‘] + dicts[‘city‘],dicts[‘cco‘] + dicts[‘cp‘] + dicts[‘cc‘], dicts[‘cb‘]],
112 ‘friendNet‘: [
113 dicts[‘uin‘], dicts[‘nickname‘], dicts[‘spacename‘], dicts[‘desc‘], dicts[‘signature‘]]
114 }
115 conn = pymssql.connect(host=‘localhost‘, user=‘sa‘, password=‘123456‘, database=‘friendDB‘,
116 charset=‘utf8‘)
117 cur = conn.cursor()
118 sql = "begin tran insertData insert into friendInfo values({},‘{}‘,{},‘{}‘,‘{}‘);" 119 "insert into friendPlace values({},‘{}‘,‘{}‘,‘{}‘,‘{}‘,‘{}‘,‘{}‘);" 120 "insert into friendNet values({},‘{}‘,‘{}‘,‘{}‘,‘{}‘);" 121 "commit tran insertData".122 format(friend_db_dict[‘friendInfo‘][0],friend_db_dict[‘friendInfo‘][1],friend_db_dict[‘friendInfo‘][2]
123 ,friend_db_dict[‘friendInfo‘][3],friend_db_dict[‘friendInfo‘][4],friend_db_dict[‘friendPlace‘][0],
124 friend_db_dict[‘friendPlace‘][1],friend_db_dict[‘friendPlace‘][2],friend_db_dict[‘friendPlace‘][3],
125 friend_db_dict[‘friendPlace‘][4],friend_db_dict[‘friendPlace‘][5],friend_db_dict[‘friendPlace‘][6],
126 friend_db_dict[‘friendNet‘][0],friend_db_dict[‘friendNet‘][1],friend_db_dict[‘friendNet‘][2],
127 friend_db_dict[‘friendNet‘][3],friend_db_dict[‘friendNet‘][4])
128 print(‘sql: ‘,sql)
129 cur.execute(sql)
130 conn.commit()
131 cur.close()
132 conn.close()
133
134 def main():
135 """主要操作
136
137 :return: void
138 """
139 req = requests.session()
140 headers={‘host‘: ‘h5.qzone.qq.com‘,
141 ‘accept-encoding‘:‘gzip, deflate, br‘,
142 ‘accept-language‘:‘zh-CN,zh;q=0.8‘,
143 ‘accept‘:‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8‘,
144 ‘user-agent‘:‘Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/‘
145 ‘59.0.3071.115 Safari/537.36‘,
146 ‘connection‘: ‘keep-alive‘}
147 cookie = web_login_cookie()
148 print(‘cookie‘,cookie)
149 g_tk = get_key(cookie)
150 qzonetoken_friend = get_key_values(cookie,‘ptcz=‘)
151 uin_friend = get_key_values(cookie,‘ptui_loginuin=‘)
152 rd_friend = get_key_values(cookie,‘_qpsvr_localtk=‘)
153 print(‘friend_data‘,‘qzontoken:%s;uin:%s;rd:%s‘ %(qzonetoken_friend,uin_friend,rd_friend))
154 headers[‘Cookie‘]=cookie
155 params_friend = {"uin": uin_friend,"fupdate": 1,"action": 1,"do":1,"g_tk":g_tk,"rd":rd_friend,
156 ‘qzonetoken‘:qzonetoken_friend}
157 url_friend = ‘https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?‘
158 data_friend_str = send_requests(req,headers,url_friend,params=params_friend)
159 data_friend_dict = loads(data_friend_str[0+len(‘_Callback(‘):data_friend_str.find(‘);‘)])
160 print(‘data_friend_dict: ‘,data_friend_dict)
161 if data_friend_dict[‘code‘] != 0: # code = -3000 message = ‘請先登錄‘
162 time.sleep(10)
163 main()
164 else:
165 data_friend_list = list(data_friend_dict[‘data‘][‘items_list‘])
166 for i in range(len(data_friend_list)):
167 each_uin = data_friend_list[i][‘uin‘]
168 each_url = ‘https://h5.qzone.qq.com/proxy/domain/base.qzone.qq.com/cgi-bin/user/cgi_userinfo_get_all?‘
169 params_each = {"uin": each_uin, "fupdate": 1, "vuin": uin_friend, "g_tk": g_tk, "rd": rd_friend,
170 ‘qzonetoken‘: qzonetoken_friend}
171 time.sleep(1)
172 data_each_str = send_requests(req,headers,each_url,params_each)
173 try:
174 data_each_dict = loads(data_each_str[0+len("_Callback("):data_each_str.find(");")])
175 except json.decoder.JSONDecodeError as e:
176 with open(‘leak.txt‘,‘a‘,encoding=‘utf8‘) as file: # 數據持久化,統計錯誤信息
177 file.write(‘except: ‘ + str(each_uin) + " " + data_friend_list[i][‘name‘] + " " + e.msg + "\n")
178 continue
179 print(‘data_each_dict: ‘,data_each_dict)
180 if data_each_dict[‘code‘] == 0: # code = -4009 message = ‘沒有訪問權限‘
181 friend_db(data_each_dict[‘data‘],name=data_friend_list[i][‘name‘])
182 else:
183 with open(‘leak.txt‘,‘a‘,encoding=‘utf8‘) as file: # 數據持久化,統計錯誤信息
184 file.write((‘沒有訪問權限: ‘ + str(each_uin) + " " + data_friend_list[i][‘name‘] + "\n"))
185 main()
python3.7 爬取QQ空間好友