使用IO多路複用完成HTTP請求
阿新 • • 發佈:2021-01-11
import socket from urllib.parse import urlparse from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE selecter = DefaultSelector() urls = ["http://app2.jg.eastmoney.com/m/ScienceBoardTopic/index.html#/topic"] stop = False # 通過socket請求html # 使用select完成http請求 # select + 回撥 + 事件迴圈 class Fetcher:def connected(self, key): selecter.unregister(key.fd) self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode("utf8")) selecter.register(self.client.fileno(), EVENT_READ, self.readable) def readable(self, key): d= self.client.recv(1024) if d: self.data += d else: selecter.unregister(key.fd) data = self.data.decode("utf8") html_data = data.split("\r\n\r\n")[1:] print("\r\n\r\n".join(html_data)) self.client.close() urls.remove(self.spider_url)if not urls: global stop stop = True def get_url(self, url): self.spider_url = url self.data = b"" url = urlparse(url) self.host = url.netloc self.path = url.path if self.path == "": self.path = "/" # 建立socket連線 self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.client.setblocking(False) try: self.client.connect((self.host, 80)) except BlockingIOError as e: pass # 註冊 selecter.register(self.client.fileno(), EVENT_WRITE, self.connected) # 事件迴圈 def loop(): while not stop: ready = selecter.select() for key, mask in ready: call_back = key.data call_back(key) if __name__ == "__main__": fetcher = Fetcher() fetcher.get_url("http://www.baidu.com") loop()