1. 程式人生 > 其它 >python爬蟲--seebug爬取

python爬蟲--seebug爬取

爬取相關漏洞,並儲存到本地檔案

思路:

1.找相關的標籤一步一步往下查詢
2.有cookie才能查詢
3.用import re 而不用from re import * 是為了防止衝突

#coding:utf-8
from requests import *
import re
from bs4 import BeautifulSoup as bs
header = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cookie': '__jsluid_s=d4e6e0a49be7d6692bb040ab291b49e3; csrftoken=kII6j1AB3N2kGfpD9eWNR5wAw2J2PQ3R; Hm_lvt_6b15558d6e6f640af728f65c4a5bf687=1646290153; __jsl_clearance_s=1646293799.561|0|uPEc3ooLbvuRvBv%2BgEWWI6SaiE8%3D; Hm_lpvt_6b15558d6e6f640af728f65c4a5bf687=1646294268',
    'Host': 'www.seebug.org',
    'Referer': 'https://www.seebug.org/',
    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Microsoft Edge";v="98"',
    'sec-ch-ua-platform': '"Windows"',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 Edg/98.0.1108.62'
}
for i in range(10):
# 變數1-10
    url = 'https://www.seebug.org/vuldb/ssvid-994%d'%(30+i)
    data = get(url, headers=header).content.decode('utf-8')
    # print (data)
    soup = bs(data,'lxml')
    # print(soup)
    title = soup.find_all('span',{'class':'pull-titile'})
    #只要title就行
    print (title,type(title))
    re = print (title,type(title))
    print(re)
    retu = str(title)
    # reg = re.compile(r'<span.+?>(.+?)</span>')
    # b = re.findall(reg, str(title))
    # print(b)
    object = open('retule.txt', 'a+')
    # output
    object.write('\n' + retu )
    object.close()

檔案: