1. 程式人生 > >練習--爬取xici可用代理IP

練習--爬取xici可用代理IP

colspan lsp com pan python print app agent flag

通過爬蟲實現xici可以使用的代理IP 端口

主要代碼:

#!/usr/bin/env python 
#coding:utf8
import telnetlib
from urllib import  request
import re

class getXici():
    def __init__(self):
        self.url = "http://www.xicidaili.com"
        self.header = {User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36
} self.page = "" def get_page(self): req = request.Request(url=self.url,headers=self.header) page = request.urlopen(req).read().decode(utf-8) self.page = page def is_available(self,ip, port): try: t = telnetlib.Telnet(ip, port) except Exception
as e: status = 0 return status status = 1 return status def get_ip_list(self): # print(self.page) # proxy_type = r<th colspan="8">.*?<h2>(.*?)</h2> # proxy_reg = re.compile(proxy_type,re.S) # proxy_text
= proxy_reg.findall(self.page) # print(proxy_text) proxy_reg = re.compile(r<td class="country"><img src="(.*?)" alt="Cn" /></td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td class="country">(.*?)</td>,re.S) print(proxy_reg) proxy_list = proxy_reg.findall(self.page) for line in proxy_list: # print(line) data = list(line) # print(data) flag = self.is_available(data[1],data[2]) if flag == 1: print(" {0} {1} {2} 可以使用 ".format(data[1],data[2],data[3])) if __name__ == "__main__": xici = getXici() xici.get_page() xici.get_ip_list()

運行結果:

技術分享圖片

隨機抽取一個驗證是否可用:

技術分享圖片

練習--爬取xici可用代理IP