python爬蟲--seebug爬取
阿新 • • 發佈:2022-03-03
爬取相關漏洞,並儲存到本地檔案
思路:
1.找相關的標籤一步一步往下查詢
2.有cookie才能查詢
3.用import re
而不用from re import *
是為了防止衝突
#coding:utf-8 from requests import * import re from bs4 import BeautifulSoup as bs header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'Cookie': '__jsluid_s=d4e6e0a49be7d6692bb040ab291b49e3; csrftoken=kII6j1AB3N2kGfpD9eWNR5wAw2J2PQ3R; Hm_lvt_6b15558d6e6f640af728f65c4a5bf687=1646290153; __jsl_clearance_s=1646293799.561|0|uPEc3ooLbvuRvBv%2BgEWWI6SaiE8%3D; Hm_lpvt_6b15558d6e6f640af728f65c4a5bf687=1646294268', 'Host': 'www.seebug.org', 'Referer': 'https://www.seebug.org/', 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Microsoft Edge";v="98"', 'sec-ch-ua-platform': '"Windows"', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'same-origin', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 Edg/98.0.1108.62' } for i in range(10): # 變數1-10 url = 'https://www.seebug.org/vuldb/ssvid-994%d'%(30+i) data = get(url, headers=header).content.decode('utf-8') # print (data) soup = bs(data,'lxml') # print(soup) title = soup.find_all('span',{'class':'pull-titile'}) #只要title就行 print (title,type(title)) re = print (title,type(title)) print(re) retu = str(title) # reg = re.compile(r'<span.+?>(.+?)</span>') # b = re.findall(reg, str(title)) # print(b) object = open('retule.txt', 'a+') # output object.write('\n' + retu ) object.close()
檔案: