1. 程式人生 > 實用技巧 >通過pyppeteer 庫獲取請求的攜帶的相關引數

通過pyppeteer 庫獲取請求的攜帶的相關引數

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#--author: Baozi

import asyncio
from pyppeteer import launch
import time
import re


url_params = ''
doc_id = ''
async def intercept_response(res):
    global url_params
    if '__dyn' in res.url and 'https://www.facebook.com/ajax/bz' in res.url and not url_params:
        url_params 
= res.url print(url_params) async def request_check(req): '''請求過濾''' if req.resourceType in ['image', 'media', 'eventsource', 'websocket']: await req.abort() else: await req.continue_() async def main(url,proxy,ua): browser = await launch({'headless': False, '
args': [ '--proxy-server={}'.format(proxy),'--disable-infobars'] }) # 啟動pyppeteer 屬於記憶體中實現互動的模擬器 time.sleep(10) page = await browser.newPage() page.on('request', intercept_response) # 設定請求頭userAgent await page.setUserAgent(ua) await page.goto(url,{'timeout': 1000*20}) global
url_params for i in range(3): if not url_params: time.sleep(10) await page.goto(url, {'timeout': 1000 * 20}) comment_click = await page.xpath('//form[@rel="async"]//div[@class="_4vn1"]/span[@class="_4vn2"]/a') await comment_click[0].click() time.sleep(2.5) await browser.close() def get_url(url,proxy,user_agent): global url_params pyputeer_params = {} try: asyncio.get_event_loop().run_until_complete(main(url,proxy,user_agent)) except Exception as e: pass url_params = url_params + '&' pyputeer_params['__user'] = '0' pyputeer_params['__a'] = '1' pyputeer_params['__dyn'] = re.findall('__dyn=(.*?)&', url_params)[0] pyputeer_params['__csr'] = re.findall('__csr=(.*?)&', url_params)[0] pyputeer_params['__req'] = re.findall('__req=(.*?)&', url_params)[0] pyputeer_params['__beoa'] = re.findall('__beoa=(.*?)&', url_params)[0] pyputeer_params['__pc'] = re.findall('__pc=(.*?)&', url_params)[0] pyputeer_params['dpr'] = re.findall('dpr=(.*?)&', url_params)[0] pyputeer_params['__ccg'] = re.findall('dpr=(.*?)&', url_params)[0] pyputeer_params['__rev'] = re.findall('__rev=(.*?)&', url_params)[0] pyputeer_params['__s'] = re.findall('__s=(.*?)&', url_params)[0] pyputeer_params['__hsi'] = re.findall('__hsi=(.*?)&', url_params)[0] pyputeer_params['__comet_req'] = re.findall('__comet_req=(.*?)&', url_params)[0] pyputeer_params['lsd'] = re.findall('lsd=(.*?)&', url_params)[0] pyputeer_params['jazoest'] = re.findall('jazoest=(.*?)&', url_params)[0] pyputeer_params['__spin_r'] = re.findall('__spin_r=(.*?)&', url_params)[0] pyputeer_params['__spin_b'] = re.findall('__spin_b=(.*?)&', url_params)[0] pyputeer_params['__spin_t'] = re.findall('__spin_t=(.*?)&', url_params)[0] return pyputeer_params if __name__ == '__main__': url = 'https://www.facebook.com/news.hkcd/posts/2966706433454938' proxy = 'http://172.16.7.14:13512' user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3965.0 Safari/537.36' print(get_url(url,proxy,user_agent))