python爬取手機號段(電信199號段)
阿新 • • 發佈:2019-01-01
# -*- coding: GBK -*-
"""
爬取手機號段歸屬地
"""
import time
import requests
from lxml import etree
time_start = time.time() # 程式開始時間
url = 'http://www.ip138.com:8080/search.asp?'
param = {'action': 'mobile', 'mobile': '1990012'}
file = open("C:\\Users\\yang\\Desktop\\phoneNumber.txt", "a+", encoding='utf-8' )
for n1 in range(0, 10):
for n2 in range(0, 10):
for n3 in range(0, 10):
for n4 in range(0, 10):
print("!!!n1: "+str(n1)+" n2: "+str(n2)+" n3: "+str(n3)+" n4: " + str(n4))
param['mobile'] = '199'+str(n1)+str(n2)+str(n3)+str(n4)
rq = requests.get(url, param)
rq.encoding = 'GBK'
page = etree.HTML(rq.text)
hs = page.xpath('/html/body/table/tr/td[@class="tdc2"]')
sum = 0
while hs[1].text is None:
rq = requests.get(url, param)
rq.encoding = 'GBK'
page = etree.HTML(rq.text)
hs = page.xpath('/html/body/table/tr/td[@class="tdc2"]' )
sum += 1
if hs is not None:
break
if sum == 20:
break
if hs[1].text is not None and hs[1].text is not '未知' and hs[1].text is not '':
resultStr = param['mobile'] + " " + hs[1].text.strip()+"\n"
file.write(resultStr)
file.close()
time_end = time.time() # 程式結束時間
print('\r程式執行時間:', time_end - time_start)