Python爬codefores所有的題目資訊
阿新 • • 發佈:2018-11-05
直接貼程式碼
from urllib.request import urlopen from bs4 import BeautifulSoup from urllib import request import pymysql.cursors import re num = 0 for i in range(10): print("當前正處於", (i + 1), "頁") URL = "http://codeforces.com/contests/page/" URL += str(i+1) # print(URL , i) req = request.Request(URL) req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36") # 請求URL並把結果用UTF-8編碼 resp = urlopen(req).read().decode("UTF-8") # 使用BeautifulSoup去解析 soup = BeautifulSoup(resp, "html.parser") # 獲取到了datatable的 div 在進裡面在第六個 div # datatable = soup.find("div", {"class": "datatable"}).findAll("div") # 變成列表了 失敗 # 讀出了當前介面的所有的比賽 data = soup.findAll("tr")[4:-1] # 醜就醜一點, 先不管了, for c in data: # 提取出場次名稱 cc = c.find("td").get_text().split() cc = "".join(cc[0: -5]) # 提取出比賽所對應的連結 url = c.a # print("http://codeforces.com" + url["href"]) # 開啟新的詞條連結 zz = "http://codeforces.com" + url["href"] req = request.Request(zz) req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36") resp1 = urlopen(req).read().decode("UTF-8") soup1 = BeautifulSoup(resp1, "html.parser") # 解析出來的是選擇的題目(我覺得不嚴謹) data1 = soup1.find("select").findAll("option") data1 = data1[1:] # 第一個選擇是沒有用的捨棄 for z in data1: num += 1 # print(z["value"], " " + z.get_text(), "http://codeforces.com" + url["href"] + "/problem/" + z["value"]) # print(cc, z.get_text(), "http://codeforces.com" + url["href"] + "/problem/" + z["value"]) connection = pymysql.connect(host='localhost', user='root', password='admin', db='intelligence', charset='utf8mb4' ) try: with connection.cursor() as cursor: # 建立sql語句 sql = "insert into `cf` (`problem`, `problemName`, `problemUrl`) values (%s, %s, %s)" # 執行sql語句 cursor.execute(sql, (cc, z.get_text(), "http://codeforces.com" + url["href"] + "/problem/" + z["value"])) # 提交 connection.commit() finally: connection.close() print("當前以新增:", num, "條資料") print("完成")
codeforces有反爬蟲機制
資料庫設計: