Python爬取百度貼吧標題
阿新 • • 發佈:2018-12-19
# -*- coding: utf-8 -*- """ Created on Sun Nov 4 10:22:07 2018 @author: wangf """ from urllib.request import urlopen import codecs from bs4 import BeautifulSoup i = 0 f = codecs.open("douban.txt", "w","utf-8") while i< 50: a = "http://tieba.baidu.com/f?ie=utf-8&kw=%E4%B8%AD%E5%8C%97%E5%A4%A7%E5%AD%A6%E5%90%A7&fr=search" i+=50 z = (i/50) print("第"+ str(z) + "頁") html = urlopen(a) bsObj = BeautifulSoup(html, "html.parser") for links in bsObj.findAll("a", {"class": "j_th_tit"}): print(links.attrs["href"] + " " + links.text) f.write(links.text+"+++"+"\n") f.close()