Python檢測批量URL狀態,並將返回正常的URL保存文件
阿新 • • 發佈:2018-04-20
pythonur#!/usr/bin/python
urls = sys.argv[1] #從程序外部調用參數,0即程序本身
result = list()
def check_url_status():
f = open(urls,‘r‘) #以讀方式打開文件
for line in f.readlines(): #依次讀取每行
line = line.strip() #去掉每行頭尾空白
if len(line) !=0:
if line[0:7]==‘http://‘ or line[0:8]==‘https://‘:
pass
else:
line=‘http://‘+line
print line
try:
#response = urllib2.urlopen(line,timeout=4)
status = urllib2.urlopen(line,timeout=4).code
#print response
print status
result.append(line)
open(‘url_ok.txt‘, ‘w‘).write(‘%s‘ % ‘\n‘.join(result)) #保存入結果文件
except urllib2.HTTPError, e:
print e.code
with open(‘url_notok.txt‘, ‘w‘) as f: #保存入結果文件
f.write(line + ‘ : ‘ + str(e.code) + ‘\n‘)
except:
print "error"
with open(‘url_notok.txt‘, ‘a‘) as f: #保存入結果文件
f.write(line + ‘ : ‘ + ‘error‘ + ‘\n‘)
-- coding: UTF-8 --
#author == huangyishan
import os
import sys
import urllib2
result = list()
def check_url_status():
f = open(urls,‘r‘) #以讀方式打開文件
for line in f.readlines(): #依次讀取每行
line = line.strip() #去掉每行頭尾空白
if len(line) !=0:
if line[0:7]==‘http://‘ or line[0:8]==‘https://‘:
pass
else:
line=‘http://‘+line
try:
#response = urllib2.urlopen(line,timeout=4)
status = urllib2.urlopen(line,timeout=4).code
#print response
print status
result.append(line)
open(‘url_ok.txt‘, ‘w‘).write(‘%s‘ % ‘\n‘.join(result)) #保存入結果文件
except urllib2.HTTPError, e:
print e.code
with open(‘url_notok.txt‘, ‘w‘) as f: #保存入結果文件
f.write(line + ‘ : ‘ + str(e.code) + ‘\n‘)
print "error"
with open(‘url_notok.txt‘, ‘a‘) as f: #保存入結果文件
f.write(line + ‘ : ‘ + ‘error‘ + ‘\n‘)
if name == ‘main‘:
check_url_status()
Python檢測批量URL狀態,並將返回正常的URL保存文件