1. 程式人生 > 其它 >中國財富網資訊爬取

中國財富網資訊爬取

技術標籤:pythonpython

###中國財富網資訊爬取

# coding=UTF-8
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import requests

for x in range(2,7):
    response = requests.get("http://www.cfbond.com/in/cfkxlb/index_{}.shtml".format(x))
    bs = BeautifulSoup(response.text, 'html.parser'
) texts = bs.find_all('li') for html in texts: url = html.find('a')['href'] header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'} info = Request(headers=header, url=url) html =
urlopen(info) bs = BeautifulSoup(html, 'html.parser') a = bs.find('title').get_text().replace(' ','').replace("\n",'').replace('\r','').replace('\t','') b = bs.find('div',{'class':'s_xlLContCRC'}).get_text().replace(' ','').replace("\n",'').replace('\r',''
).replace('\t','') with open("浙江財富網.txt",'a',encoding='utf-8') as f: f.write(a+"\n"+b+"\n"+"\n")