爬取綠盟漏洞掃描器資料
阿新 • • 發佈:2020-09-10
因為一些工作原因需要用到安全裝置掃描器的漏洞資料,但是安全裝置掃描器本身匯出的漏洞資料是加密的,所以只能是使用爬蟲進行爬取。
程式碼如下:
# -*-coding:utf-8 -*- import requests, re import sys from bs4 import BeautifulSoup import re,sys,os import xlsxwriter from xlrd import open_workbook from xlutils.copy import copy put_name = 'loudong' def login(login_url, username, password): # 請求頭 my_headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip', 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4', 'Origin': 'https://10.10.10.10', 'Referer': 'https://10.10.10.10/accounts/login_view/' } # 獲取token sss = requests.Session() r = sss.get(url='https://10.10.10.10/accounts/login/', headers=my_headers, verify=False) # <input type=\'hidden\' name=\'csrfmiddlewaretoken\' value="mvTgwjCx1iTzAdRROOPvk8YctcbO9uXV">' pattern = re.compile(r'<input type=\'hidden\' name=\'csrfmiddlewaretoken\' value="(.*)">') result = pattern.findall(r.text) token = result[0] # postdata my_data = { # 'commit' : '登入', 'username': username, 'password': password, 'csrfmiddlewaretoken': token } # 登入後k r = sss.post(login_url,headers=my_headers,data=my_data,verify=False) #print(r.text) return sss def get_date(url,sss): my_headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip', 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4', 'Origin': 'https://10.10.10.10', 'Referer': 'https://10.10.10.10/template/show_template?temp_id=12&temp_name=%E5%85%A8%E9%83%A8%E6%BC%8F%E6%B4%9E%E6%89%AB%E6%8F%8F&temp_desc=%E6%9C%AC%E6%A8%A1%E6%9D%BF%E6%89%AB%E6%8F%8F%E6%89%80%E6%9C%89%E6%8F%92%E4%BB%B6&vlun_count_allundefined' } my_data = { 'val': 'System', 'temp_id': '12', 'conditions': 'is_dangerous =', 'op_type':'showStemp' } r = sss.get(url, headers=my_headers, data=my_data, verify=False,timeout=5) #判斷頁面是否為空 if r.apparent_encoding == 'utf-8': pass CVE_id = '' cvss_score='' CNCVE_id='' risk_score='' print('頁面不為空....') else: print(url,'--> !!頁面為空 ') return soup=BeautifulSoup(r.text,"html.parser") #class 為 odd tables=soup.find_all('tr',class_='odd') #print(tables) for i,env in enumerate(tables): #漏洞名稱 if i == 0: leak_name=env.get_text() #解決方法 if i == 1: str_env=env.get_text() solution=str_env[5:] solution =solution.replace('\n','') # #危險外掛 # if i ==2: # str_env=env.get_text() # danger_plug=str_env[6] #CVE編號 if i == 3: str_env = env.get_text() CVE_id=str_env[7:] #CVSS評分 if i == 5: str_env = env.get_text() cvss_score=str_env[7:] #class 為even tables2=soup.find_all('tr',class_='even') for i,env in enumerate(tables2): #漏洞描述 if i ==0: str_env=env.get_text() leak_desc=str_env[6:].strip() leak_desc=leak_desc.replace('\n', '') #危險分值 if i ==1: str_env=env.get_text() risk_score=str_env[5:] #發現日期 if i ==2: str_env=env.get_text() data_discovery=str_env[5:] #CNCVE編號 if i ==3: str_env=env.get_text() CNCVE_id=str_env[9:] #CNVD tables3 = soup.find_all('td') #print(tables3) cnvd_id='' if "CNVD" in tables3[-1].get_text(): cnvd_id=tables3[-1].get_text() else: pass cnnvd_id='' if "CNNVD" in tables3[-6].get_text(): cnnvd_id=tables3[-6].get_text() else: pass print('資料返回---> succcess') log_file(url) return leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score def w_file(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score): if not os.path.exists(put_name+".xls"): workbook = xlsxwriter.Workbook(put_name+".xls") # 建立 worksheet = workbook.add_worksheet('employee') workbook.close() else: r_xls = open_workbook(put_name+".xls") # 讀取excel檔案 row = r_xls.sheets()[0].nrows # 獲取已有的行數 excel = copy(r_xls) # 將xlrd的物件轉化為xlwt的物件 table = excel.get_sheet(0) # 獲取要操作的sheet # 對excel表追加一行內容 table.write(row, 0, leak_name) table.write(row, 1, solution) table.write(row, 2, CVE_id) table.write(row, 3, cvss_score) table.write(row, 4, leak_desc) table.write(row, 5, data_discovery) table.write(row, 6, CNCVE_id) table.write(row, 7, cnvd_id) table.write(row, 8, cnnvd_id) table.write(row, 8, risk_score) excel.save(put_name+".xls") def e_file(str_f): f=open('error.txt','a+') str_f=str(str_f) f.write(str_f+'\n') f.close() def log_file(str_f): f=open('w_file.txt','a+') str_f=str(str_f) f.write(str_f+'\n') f.close() if __name__ == '__main__': login_success = login("https://10.10.10.10/accounts/login_view/", "username", "password") for i in range(50000,60000): url = "https://10.10.10.10/template/show_vul_desc?id=%s"%(i) #url='https://10.10.10.10/template/show_vul_desc?id=50123' try: leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score=get_date(url,login_success) # #print(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id) w_file(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score) except Exception as e: e_f=url+str(e) e_file(e_f) print(url,e) else: print(url,"爬取結束end")
總結:在進行登入時遇到了token的問題,想了很長時間才解決,但是由於漏洞資訊頁的頁碼沒有規律,所以只能是窮舉了。。
程式碼本身還有很大的優化空間,以後有時間再完善。