中國財富網資訊爬取
阿新 • • 發佈:2021-01-10
###中國財富網資訊爬取
# coding=UTF-8
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import requests
for x in range(2,7):
response = requests.get("http://www.cfbond.com/in/cfkxlb/index_{}.shtml".format(x))
bs = BeautifulSoup(response.text, 'html.parser' )
texts = bs.find_all('li')
for html in texts:
url = html.find('a')['href']
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'}
info = Request(headers=header, url=url)
html = urlopen(info)
bs = BeautifulSoup(html, 'html.parser')
a = bs.find('title').get_text().replace(' ','').replace("\n",'').replace('\r','').replace('\t','')
b = bs.find('div',{'class':'s_xlLContCRC'}).get_text().replace(' ','').replace("\n",'').replace('\r','' ).replace('\t','')
with open("浙江財富網.txt",'a',encoding='utf-8') as f:
f.write(a+"\n"+b+"\n"+"\n")