1. 程式人生 > >非同步 攜程 網路小爬蟲

非同步 攜程 網路小爬蟲


from urllib import request
import gevent,time
from gevent import monkey
monkey.patch_all() #把當前程式的所有的io操作給我單獨的做上標記

def f(url):
print('GET: %s' % url)
resp = request.urlopen(url)
data = resp.read()
print('%d bytes received from %s.' % (len(data), url))

urls = ['https://www.python.org/',
'https://www.yahoo.com/',
'https://github.com/' ]
time_start = time.time()
for url in urls:
f(url)
print("同步cost",time.time() - time_start)
async_time_start = time.time()
gevent.joinall([
gevent.spawn(f, 'https://www.python.org/'),
gevent.spawn(f, 'https://www.yahoo.com/'),
gevent.spawn(f, 'https://github.com/'),
])
print("非同步cost",time.time() - async_time_start)


GET: https://www.python.org/
50114 bytes received from https://www.python.org/.
GET: https://www.yahoo.com/
505000 bytes received from https://www.yahoo.com/.
GET: https://github.com/
65396 bytes received from https://github.com/.
同步cost 3.5022003650665283
GET: https://www.python.org/
GET: https://www.yahoo.com/
GET: https://github.com/
65396 bytes received from https://github.com/.
50114 bytes received from https://www.python.org/.
504996 bytes received from https://www.yahoo.com/.
非同步cost 1.332076072692871