工作中用到的小指令碼2
阿新 • • 發佈:2020-08-19
import xlwt import openpyxl from urllib.parse import * import xlrd def eq(l): workbook = xlrd.open_workbook('data.xlsx') table = workbook.sheet_by_index(0) all_data=[] url=[] test=[] for i in range(0,table.nrows): u = table.cell(i, 1).value ip=table.cell(i,0).value #print(ip) if u not in l: print(u,"\t",ip) #檔案移動函式 def moveFile(srcfile,dstfile): if not os.path.isfile(srcfile): print ("%s 該檔案不存在!請檢查您的輸入"%(srcfile)) else: fpath,fname=os.path.split(dstfile) #分離檔名和路徑 if not os.path.exists(fpath): os.makedirs(fpath) #建立路徑 shutil.move(srcfile,dstfile) #移動檔案 def searchdata(l,dir): workbook=xlrd.open_workbook('C:\\Users\\yxb\\Downloads\\彙總高危\\網站基本資訊20200424(1).xls') workbook2=xlrd.open_workbook('C:\\Users\\yxb\\Downloads\\彙總高危\\網站群網站清單_20200312入庫(1).xlsx') table1=workbook.sheet_by_index(0) table2=workbook2.sheet_by_index(0) all_data=[] un=[] ip=[] url=[] start=[] end=[] for i in range(0,table1.nrows): u=table1.cell(i,4).value unit=table1.cell(i,-1).value ip.append(table1.cell(i,5).value) un.append(table1.cell(i,16).value) if u=='*' or u=='無': u=table1.cell(i,5).value all_data.append(unit) url.append(u) url.append(u) all_data.append(unit) for i in range(0,table2.nrows): u=table2.cell(i,0).value unit=table2.cell(i,3).value url.append(u) all_data.append(unit) sum=0 for i in l: if i in url: print(i,"\t",all_data[url.index(i)]) start.append(i) end.append(all_data[url.index(i)]) else: if i in ip: print(i,"\t",un[ip.index(i)]) start.append(i) end.append(all_data[ip.index(i)]) list = os.listdir(dir) for i in range(0, len(list)): path = os.path.join(dir, list[i]) if os.path.isfile(path): with open(path, encoding="utf-8") as f: content = f.read() doc = pq(content) # 解析html 文字 item = doc("h1") s=((item.eq(2).text())) res = urlparse(s) # print(res) if s in start: moveFile(path,"F:\\scrapy\\819\\"+end[start.index(s)]+"\\") else: if res.scheme == 'http' or res.scheme is None or res.scheme == '' or res.scheme == 'https': if res.netloc == '': # print(res.path) if res.path in start: moveFile(path, "F:\\scrapy\\819\\"+end[start.index(res.path)]+"\\") else: if res.netloc in start: moveFile(path, "F:\\scrapy\\819\\"+end[start.index(res.netloc)]+"\\") # list.append(res.netloc) # print(res.netloc) else: if res.scheme in start: moveFile(path, "F:\\scrapy\\819\\"+end[start.index(res.scheme)]+"\\") # listUrl.append(res.scheme) # print(res.scheme) print('操作完成') def chooseInfo(dir): l=[] listUrl=[] list = os.listdir(dir) for i in range(0, len(list)): path = os.path.join(dir, list[i]) if os.path.isfile(path): with open(path, encoding="utf-8") as f: content = f.read() doc = pq(content) # 解析html 文字 item = doc("h1") s=((item.eq(2).text())) #if int(item.eq(1).html()) > 0 or int(item.eq(3).html()) > 0: #parrten='^?([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}(/)' #a=re.findall('(?:[-\w.]|(?:%[\da-fA-F]{2}))+',s) #a=re.split('(?:[-\w.]|(?:%[\da-fA-F]{2}))+',a) #print(a) l.append(s) #print("操作全部完成!") for url in l: res=urlparse(url) #print(res) if res.scheme=='http' or res.scheme is None or res.scheme=='' or res.scheme=='https': if res.netloc =='': #print(res.path) listUrl.append(res.path) else: list.append(res.netloc) #print(res.netloc) else: listUrl.append(res.scheme) #print(res.scheme) return listUrl a=[] a=chooseInfo("C:\\Users\\yxb\\Downloads\\彙總高危\\總\\") searchdata(a,"C:\\Users\\yxb\\Downloads\\彙總高危\\總\\") #eq(a)