1. 程式人生 > >通過關鍵字獲取漏洞平臺最新漏洞信息

通過關鍵字獲取漏洞平臺最新漏洞信息

date pri header close bili log文件 lte text false

因要每天去查找漏洞信息,來判斷架構和應用服務有沒有新漏洞被發現,若有 則需修復升級。所以寫了一個去漏洞庫平臺通過關鍵字爬取數據生成日誌文件,這三個平臺,就美國國家信息安全漏洞庫地址會時不時出現超時情況。若出現超時,可多試兩次,三個平臺檢索出的漏洞差不多,寫的不好,僅供參考

python版本3.7
pip安裝requests即可

#coding=utf-8
import requests as r
import re
import time
import datetime

#爬取國家信息安全漏洞平臺
class gjxxaqpt:
def get_404(self,url,keyword):
#定義提交數據 qcvCname 檢索的詞,pageno 頁數 一般是抓取第一頁

data = {"qcvCname":keyword,"pageno":1}
#post數據
result = r.post(url,data=data).text
#正則匹配信息
filter_result = re.findall(‘<li style=".?class="a_title2" >\r\n \t\t (.?)</a>.?<p><a href="(.?)" target="_blank">(.?)</a>.
?<img title="(.?)" src=".?<br/ >(.?)\r\n\t\t\t\t\t\t </div>.?</li>‘,result,re.S)
return filter_result

def get_404_mes(self,url):
    header = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36‘}
    try:
        result = r.get(url,headers=header).text
        filter_result = re.findall(‘<div class="d_ldjj">.*?</p><p style="text-indent:2em">\n(.*?)\r\n\t\t\t</p>‘,result,re.S)
        if filter_result is not None:
            return filter_result[0]
        else:
            return ""
    except:
        print("連接超時"+url)

def write_file(self,keyword,date_time):
    #定義post的url
    url = "http://www.cnnvd.org.cn/web/vulnerability/queryLds.tag"
    #定義後面組合信息需要的域名
    url_domain = "http://www.cnnvd.org.cn"
    #存放最後結果數據的數組
    mes_list = []
    #循環檢索關鍵詞
    for keyword in keylist:
        try:
            #調用方法獲取檢索的結果
            get_404_re = self.get_404(url,keyword)
            #循環結果,拼接成字符串,寫入log文件
            for res in get_404_re:
                #判斷包含本年日期的漏洞
                if date_time in res[4]:
                    mes_url = url_domain + res[1]
                    try:
                        message = self.get_404_mes(mes_url)
                        mes = res[0] + " | " "漏洞編號:" + res[2] + " | " + "等級:" + res[3] + " | " + "時間:" + res[4] + " | " + "詳情地址:" + mes_url + " | " + "漏洞簡介:" + message
                        mes_list.append(mes)
                    except:
                        print("timeout: "+mes_url)
        except:
            print("timeout:"+url+","+"keyword")
    return mes_list

#cve中文漏洞信息庫 - scap中文社區
class cve_scap:
#獲取所有漏洞集合
def get_cve_404(self,url,keyword):
headers = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36‘}
#定義提交數據 qcvCname 檢索的詞,pageno 頁數 一般是抓取第一頁
data = {"search_type":"t_keyword","keyword":keyword}
#post數據
result = r.get(url,params=data).text
filter_result = re.findall("<td class=‘hidden-xs‘>.?<a href=(.?)>\n (.?)\n </a>.?<td class=‘hidden-xs hidden-sm‘>(.?)</td>.?title=‘(.*?)‘ class=‘grade",result,re.S)
return filter_result

#對單個漏洞信息獲取
def get_cve_404_mes(self,url):
    header = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36‘}
    try:
        result = r.get(url,headers=header).text
        filter_result = re.findall("pad30T pad30B mrg0B‘ style=‘word-wrap: break-word;‘>\n                        (.*?)</p>",result,re.S)
        if filter_result is not None:
            return filter_result[0]
        else:
            return ""
    except:
        print("timeout: " + url)

#信息獲取
def write_file(self,keylist,date_time):
    #定義post的url
    url = "http://cve.scap.org.cn/vulns/1"
    #定義數組,存放信息
    mes_list = []
    for keyword in keylist:
        #爬取網站
        html_filter = self.get_cve_404(url,keyword)
        #定義後面組合信息需要的域名
        url_domain = "http://cve.scap.org.cn"
        for res in html_filter:
            if date_time in res[2]:
                try:
                    mes_url = url_domain + res[0].strip(‘"‘)
                    message = self.get_cve_404_mes(mes_url)
                    mes = "漏洞編號:" + res[1] + " | " + "等級:" + res[3] + " | " + "時間:" + res[2] + " | " + "詳情地址:" + mes_url + " | " + "漏洞簡介:" + message.replace("\n","")
                    mes_list.append(mes)
                except:
                    print("timeout: "+mes_url)

    return mes_list

#美國國家信息安全漏洞庫
class nvd_nist:
#獲取所有漏洞集合
def get_nvd_404(self,url,keyword):
headers = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36‘}
#定義提交數據 qcvCname 檢索的詞,pageno 頁數 一般是抓取第一頁
data = {"form_type":"Basic","results_type":"overview","query":keyword,"search_type":"all"}
#關閉ssl告警提示
r.packages.urllib3.disable_warnings()
#post數據
result = r.get(url,params=data,verify=False).text
filter_result = re.findall("<tr data-testid=\"vuln-row.?<a href=\"(.?)\" id=\".?data-testid=\"vuln-detail-link-[0-9]{1,2}\">(.?)</a></strong><br/>.?<p data-testid=‘vuln-summary-[0-9]{1,2}‘>(.?)</p>.?<span data-testid=‘vuln-published-on-[0-9]{1,2}‘>(.?)</span>",result,re.S)
return filter_result

def write_file(self,keylist,date_time):
    #查找漏洞的url地址
    url = "https://nvd.nist.gov/vuln/search/results"
    #拼接url的地址
    url_dom = "https://nvd.nist.gov"
    #存儲最後結果的數組
    mes_list = []
    #循環獲取關鍵字的漏洞信息
    for keyword in keylist:
        try:
            filter_html = self.get_nvd_404(url,"nginx")
            for res in filter_html:
                url_domain = url_dom + res[0]
                #對英文的時間格式進行轉換
                eng_time = res[3]
                if "AM" in eng_time:
                    up_time = eng_time.split("AM")[0]
                elif "PM" in eng_time:
                    up_time = eng_time.split("PM")[0]
                else:
                    print("時間判斷有誤")
                #我獲取到的時間是英文的時間格式,需要轉換為數字時間格式,這裏時間字符串和裏面的時間格式要保持一次,差一個空格,也不行
                time_format=str(datetime.datetime.strptime(up_time,‘%B %d, %Y; %H:%M:%S ‘))
                if date_time in time_format:
                    mes = "漏洞編號:" + res[1]  + " | " + "時間:" + time_format + " | " + "詳情地址:" + url_domain + " | " + "漏洞簡介:" + res[2]
                    mes_list.append(mes)
        except:
            print("timeout:" + url + "," + keyword)
    return mes_list

if name == "main":
#需要查找的關鍵字數組
keylist=[‘nginx‘,‘openssl‘,‘openssh‘]
#獲取本年的日期
date_time = time.strftime("%Y",time.localtime())
#打開寫入log文件
files = open("404_message.log","w+",encoding=‘utf-8‘)

#獲取國家信息漏洞庫
guojia = gjxxaqpt()
files.write("#國家信息漏洞庫:\n")
for i in guojia.write_file(keylist,date_time):
    files.write(i+"\n")
files.write("\n")

#cve中文漏洞信息庫 - scap中文社區 查找
cve = cve_scap()
files.write("#cve中文漏洞信息庫:\n")
for i in cve.write_file(keylist,date_time):
    files.write(i+"\n")
files.write("\n")

#美國國家信息安全漏洞庫 查找
nvd = nvd_nist()
files.write("#美國國家信息安全漏洞庫:\n")
for i in nvd.write_file(keylist,date_time):
    files.write(i+"\n")
files.write("\n")

files.close()

通過關鍵字獲取漏洞平臺最新漏洞信息