1. 程式人生 > 實用技巧 >使用IO多路複用完成HTTP請求

使用IO多路複用完成HTTP請求

import socket
from urllib.parse import urlparse
from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE


selecter = DefaultSelector()
urls = ["http://app2.jg.eastmoney.com/m/ScienceBoardTopic/index.html#/topic"]
stop = False


# 通過socket請求html
# 使用select完成http請求
# select + 回撥 + 事件迴圈
class Fetcher:
    
def connected(self, key): selecter.unregister(key.fd) self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode("utf8")) selecter.register(self.client.fileno(), EVENT_READ, self.readable) def readable(self, key): d
= self.client.recv(1024) if d: self.data += d else: selecter.unregister(key.fd) data = self.data.decode("utf8") html_data = data.split("\r\n\r\n")[1:] print("\r\n\r\n".join(html_data)) self.client.close() urls.remove(self.spider_url)
if not urls: global stop stop = True def get_url(self, url): self.spider_url = url self.data = b"" url = urlparse(url) self.host = url.netloc self.path = url.path if self.path == "": self.path = "/" # 建立socket連線 self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.client.setblocking(False) try: self.client.connect((self.host, 80)) except BlockingIOError as e: pass # 註冊 selecter.register(self.client.fileno(), EVENT_WRITE, self.connected) # 事件迴圈 def loop(): while not stop: ready = selecter.select() for key, mask in ready: call_back = key.data call_back(key) if __name__ == "__main__": fetcher = Fetcher() fetcher.get_url("http://www.baidu.com") loop()