爬取綠盟漏洞掃描器資料

阿新 • • 發佈：2020-09-10

因為一些工作原因需要用到安全裝置掃描器的漏洞資料，但是安全裝置掃描器本身匯出的漏洞資料是加密的，所以只能是使用爬蟲進行爬取。

程式碼如下：

# -*-coding:utf-8 -*-
import requests, re
import sys
from bs4 import BeautifulSoup
import re,sys,os
import xlsxwriter
from xlrd import open_workbook
from xlutils.copy import copy

put_name = 'loudong'


def login(login_url, username, password):
    # 請求頭
    my_headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip',
        'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
        'Origin': 'https://10.10.10.10',
        'Referer': 'https://10.10.10.10/accounts/login_view/'
    }

    # 獲取token
    sss = requests.Session()
    r = sss.get(url='https://10.10.10.10/accounts/login/', headers=my_headers, verify=False)
    # <input type=\'hidden\' name=\'csrfmiddlewaretoken\' value="mvTgwjCx1iTzAdRROOPvk8YctcbO9uXV">'
    pattern = re.compile(r'<input type=\'hidden\' name=\'csrfmiddlewaretoken\' value="(.*)">')

    result = pattern.findall(r.text)
    token = result[0]

    # postdata
    my_data = {
        # 'commit' : '登入',
        'username': username,
        'password': password,
        'csrfmiddlewaretoken': token
    }

    # 登入後k
    r = sss.post(login_url,headers=my_headers,data=my_data,verify=False)
    #print(r.text)

    return sss

def get_date(url,sss):
    my_headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip',
        'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
        'Origin': 'https://10.10.10.10',
        'Referer': 'https://10.10.10.10/template/show_template?temp_id=12&temp_name=%E5%85%A8%E9%83%A8%E6%BC%8F%E6%B4%9E%E6%89%AB%E6%8F%8F&temp_desc=%E6%9C%AC%E6%A8%A1%E6%9D%BF%E6%89%AB%E6%8F%8F%E6%89%80%E6%9C%89%E6%8F%92%E4%BB%B6&vlun_count_allundefined'
    }
    my_data = {
        'val': 'System',
        'temp_id': '12',
        'conditions': 'is_dangerous =',
        'op_type':'showStemp'
    }
    r = sss.get(url, headers=my_headers, data=my_data, verify=False,timeout=5)
    #判斷頁面是否為空
    if r.apparent_encoding == 'utf-8':
        pass
        CVE_id = ''
        cvss_score=''
        CNCVE_id=''
        risk_score=''
        print('頁面不為空....')
    else:
        print(url,'-->   !!頁面為空 ')
        return
    soup=BeautifulSoup(r.text,"html.parser")

    #class 為 odd
    tables=soup.find_all('tr',class_='odd')
    #print(tables)
    for i,env in enumerate(tables):
        #漏洞名稱
        if i == 0:
            leak_name=env.get_text()
        #解決方法
        if i == 1:
            str_env=env.get_text()
            solution=str_env[5:]
            solution =solution.replace('\n','')
        # #危險外掛
        # if i ==2:
        #     str_env=env.get_text()
        #     danger_plug=str_env[6]
        #CVE編號

        if i == 3:
            str_env = env.get_text()
            CVE_id=str_env[7:]
        #CVSS評分
        if i == 5:
            str_env = env.get_text()
            cvss_score=str_env[7:]

    #class 為even
    tables2=soup.find_all('tr',class_='even')
    for i,env in enumerate(tables2):
        #漏洞描述
        if  i ==0:
            str_env=env.get_text()
            leak_desc=str_env[6:].strip()
            leak_desc=leak_desc.replace('\n', '')
        #危險分值
        if  i ==1:
            str_env=env.get_text()
            risk_score=str_env[5:]

        #發現日期
        if  i ==2:
            str_env=env.get_text()
            data_discovery=str_env[5:]
        #CNCVE編號
        if  i ==3:
            str_env=env.get_text()
            CNCVE_id=str_env[9:]

    #CNVD
    tables3 = soup.find_all('td')
    #print(tables3)
    cnvd_id=''

    if "CNVD" in tables3[-1].get_text():
        cnvd_id=tables3[-1].get_text()
    else:
        pass
    cnnvd_id=''
    if "CNNVD" in tables3[-6].get_text():
        cnnvd_id=tables3[-6].get_text()
    else:
        pass
    print('資料返回---> succcess')
    log_file(url)
    return leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score

def w_file(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score):
    if not os.path.exists(put_name+".xls"):
        workbook = xlsxwriter.Workbook(put_name+".xls")  # 建立
        worksheet = workbook.add_worksheet('employee')
        workbook.close()
    else:
        r_xls = open_workbook(put_name+".xls") # 讀取excel檔案
        row = r_xls.sheets()[0].nrows  # 獲取已有的行數
        excel = copy(r_xls) # 將xlrd的物件轉化為xlwt的物件
        table = excel.get_sheet(0)  # 獲取要操作的sheet
        # 對excel表追加一行內容
        table.write(row, 0, leak_name)
        table.write(row, 1, solution)
        table.write(row, 2, CVE_id)
        table.write(row, 3, cvss_score)
        table.write(row, 4, leak_desc)
        table.write(row, 5, data_discovery)
        table.write(row, 6, CNCVE_id)
        table.write(row, 7, cnvd_id)
        table.write(row, 8, cnnvd_id)
        table.write(row, 8, risk_score)
        excel.save(put_name+".xls")


def e_file(str_f):
    f=open('error.txt','a+')
    str_f=str(str_f)
    f.write(str_f+'\n')
    f.close()

def log_file(str_f):
    f=open('w_file.txt','a+')
    str_f=str(str_f)
    f.write(str_f+'\n')
    f.close()

if __name__ == '__main__':
    login_success = login("https://10.10.10.10/accounts/login_view/", "username", "password")

    for i in range(50000,60000):
        url = "https://10.10.10.10/template/show_vul_desc?id=%s"%(i)

        #url='https://10.10.10.10/template/show_vul_desc?id=50123'
        try:
            leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score=get_date(url,login_success)
            # #print(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id)

            w_file(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score)
        except Exception as e:
            e_f=url+str(e)
            e_file(e_f)
            print(url,e)
        else:
            print(url,"爬取結束end")

總結：在進行登入時遇到了token的問題，想了很長時間才解決，但是由於漏洞資訊頁的頁碼沒有規律，所以只能是窮舉了。。
程式碼本身還有很大的優化空間，以後有時間再完善。