1. 程式人生 > 實用技巧 >修改requests_html.AsyncHTMLSessions使得支援url引數

修改requests_html.AsyncHTMLSessions使得支援url引數

一、修改原始碼

#重寫AsyncHTMLSession中的run()方法

    def run(self, *coros,urls=None):
        """ Pass in all the coroutines you want to run, it will wrap each one
            in a task, run it and wait for the result. Return a list with all
            results, this is returned in the same order coros are passed in. 
""" if urls: if isinstance(urls,list): tasks = [ asyncio.ensure_future(coro(url)) for coro in coros for url in urls ] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for
t in done] else: tasks = [ asyncio.ensure_future(coro(urls)) for coro in coros ] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for t in done] else: tasks
= [ asyncio.ensure_future(coro()) for coro in coros ] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for t in done]

二、測試

from requests_html import AsyncHTMLSession

asession = AsyncHTMLSession()

async def get_link(link):
    res = await asession.get(link)
    return res.html.absolute_links

url="https://www.cnblogs.com/"
results = asession.run(get_link,urls=url)
print(results)

url=["https://www.cnblogs.com/","https://www.jd.com"]
results = asession.run(get_link,urls=url)
print(results)

三、或者新建一個NewAsyncHTMLSession繼承

from requests_html import AsyncHTMLSession
import asyncio

class NewAsyncHTMLSession(AsyncHTMLSession):
    def run(self, *coros,urls=None):
        """ Pass in all the coroutines you want to run, it will wrap each one
            in a task, run it and wait for the result. Return a list with all
            results, this is returned in the same order coros are passed in. """
        if urls:
            if isinstance(urls,list):
                tasks = [
                    asyncio.ensure_future(coro(url)) for coro in coros  for url in urls
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]
            else:
                tasks = [
                    asyncio.ensure_future(coro(urls)) for coro in coros
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]
        else:
            tasks = [
                asyncio.ensure_future(coro()) for coro in coros
            ]
            done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
            return [t.result() for t in done]

asession = NewAsyncHTMLSession()

async def get_link(link):
    res = await asession.get(link)
    return res.html.absolute_links

url="https://www.cnblogs.com/"
results = asession.run(get_link,urls=url)
print(results)

url=["https://www.cnblogs.com/","https://www.jd.com"]
results = asession.run(get_link,urls=url)
print(results)