1. 程式人生 > >[原創] Demo: Python crawler use chrome headless

[原創] Demo: Python crawler use chrome headless

python crawler use chrome headless. Only support python version 3.5+.

  • Download Chrome or Chromium
  • Download pyppeteer $ python3 -m pip install pyppeteer
  • Demo
import asyncio
from pyppeteer.launcher import launch
# 這裡還可以新增別的引數. Macbook 位於: /usr/local/lib/python3.6/site-packages/pyppeteer/launcher.py Launcher.options
browser = launch({"executablePath":"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"}) async def main(browser): page = await browser.newPage() await page.goto('https://www.baidu.com/') print("fun: " + str(dir(page))) title = await page.title() print("title: " + title) element = await page.querySelector('#ftConw'
) text = await element.evaluate('(element) => element.textContent') print("text: " + text) asyncio.get_event_loop().run_until_complete(main(browser)) browser.close() # fun: ['Events', 'J', 'JJ', 'Jeval', 'PaperFormats', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_add_event_handler', '_client', '_emulationManager', '_events', '_frameManager', '_go', '_handleException', '_ignoreHTTPSErrors', '_keyboard', '_loop', '_mouse', '_networkManager', '_onCertificateError', '_onConsoleAPI', '_onDialog', '_onTargetCrashed', '_pageBindings', '_schedule', '_screenshotTask', '_screenshotTaskQueue', '_touchscreen', '_tracing', '_viewport', 'addScriptTag', 'click', 'close', 'content', 'cookies', 'deleteCookie', 'emit', 'emulate', 'emulateMedia', 'evaluate', 'evaluateOnNewDocument', 'exposeFunction', 'focus', 'frames', 'goBack', 'goForward', 'goto', 'hover', 'injectFile', 'keyboard', 'listeners', 'mainFrame', 'mouse', 'on', 'once', 'pdf', 'plainText', 'press', 'querySelector', 'querySelectorAll', 'querySelectorEval', 'reload', 'remove_all_listeners', 'remove_listener', 'screenshot', 'setContent', 'setCookie', 'setExtraHTTPHeaders', 'setJavaScriptEnabled', 'setRequestInterceptionEnabled', 'setUserAgent', 'setViewport', 'tap', 'title', 'touchscreen', 'tracing', 'type', 'url', 'viewport', 'waitFor', 'waitForFunction', 'waitForNavigation', 'waitForSelector']
# title: 百度一下,你就知道 # text: 把百度設為主頁關於百度About Baidu百度推廣©2017 Baidu 使用百度前必讀 意見反饋 京ICP證030173號 京公網安備11000002000001號