[python]select+回撥+事件迴圈獲取html
阿新 • • 發佈:2018-12-11
#通過非阻塞io實現http請求 # select + 回撥 + 事件迴圈 # 併發性高 # 使用單執行緒 import socket from urllib.parse import urlparse from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE selector = DefaultSelector() #使用select完成http請求 urls = [] stop = False class Fetcher: def connected(self, key): selector.unregister(key.fd) self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode("utf8")) selector.register(self.client.fileno(), EVENT_READ, self.readable) def readable(self, key): d = self.client.recv(1024) if d: self.data += d else: selector.unregister(key.fd) data = self.data.decode("utf8") html_data = data.split("\r\n\r\n")[1] print(html_data) self.client.close() urls.remove(self.spider_url) if not urls: global stop stop = True def get_url(self, url): self.spider_url = url url = urlparse(url) self.host = url.netloc self.path = url.path self.data = b"" if self.path == "": self.path = "/" # 建立socket連線 self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.client.setblocking(False) try: self.client.connect((self.host, 80)) # 阻塞不會消耗cpu except BlockingIOError as e: pass #註冊 selector.register(self.client.fileno(), EVENT_WRITE, self.connected) def loop(): #事件迴圈,不停的請求socket的狀態並呼叫對應的回撥函式 #1. select本身是不支援register模式 #2. socket狀態變化以後的回撥是由程式設計師完成的 while not stop: ready = selector.select() for key, mask in ready: call_back = key.data call_back(key) #回撥+事件迴圈+select(poll\epoll) if __name__ == "__main__": fetcher = Fetcher() import time start_time = time.time() for url in range(20): url = "http://shop.projectsedu.com/goods/{}/".format(url) urls.append(url) fetcher = Fetcher() fetcher.get_url(url) loop() print(time.time()-start_time)