爬取網頁資料
阿新 • • 發佈:2020-12-24
爬取必應網站資料
import requests from lxml import etree url='https://cn.bing.com/' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3823.400 QQBrowser/10.7.4307.400' } req = requests.get(url=url, headers=headers) req.encoding= 'utf-8' a=req.text b=req.content print(req.text) print(req.status_code) print(len(str(a))) print(len(str(b))) for i in range(20): req = requests.get(url=url, headers=headers) print(req.status_code)
爬取中國大學排行網站資料
import requests from lxml import etree import csv url='https://www.shanghairanking.cn/rankings/bcur/201911' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3823.400 QQBrowser/10.7.4307.400' } req=requests.get(url=url,headers=headers) req.encoding='utf-8' # print(req.text) html=etree.HTML(req.text) rank=html.xpath("//td[@class='align-left']/a/text()") r=1 with open(r'C:\Users\dell 7000\Desktop\text.xls', 'w', newline='')as f: csv_write = csv.writer(f, dialect='excel') csv_write.writerow(['rank','name']) for i in rank: item=[] item.append(r) item.append(i) r = r + 1 print(item) csv_write.writerow(item)