Python非同步請求【限制併發量】
阿新 • • 發佈:2021-08-12
限制特定併發量
import asyncio import aiohttp CONCURRENCY = 5 # 併發量 URL = 'https://www.baidu.com' semaphore = asyncio.Semaphore(CONCURRENCY) session = None index = 0 async def scrape_api(): async with semaphore: global index index += 1 print('scraping',str(index), URL) async with session.get(URL) as response: await asyncio.sleep(1) return await response.text() async def main(): global session session = aiohttp.ClientSession(connector=aiohttp.TCPConnector(limit=64, ssl=False)) scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)] await asyncio.gather(*scrape_index_tasks) if __name__ == '__main__': asyncio.get_event_loop().run_until_complete(main())
結果如下:
scraping 1 https://www.baidu.com scraping 2 https://www.baidu.com scraping 3 https://www.baidu.com scraping 4 https://www.baidu.com scraping 5 https://www.baidu.com scraping 6 https://www.baidu.com scraping 7 https://www.baidu.com scraping 8https://www.baidu.com scraping 9 https://www.baidu.com scraping 10 https://www.baidu.com
來自拉勾教育 52講輕鬆搞定網路爬蟲