如何分頁爬去資料--beautisoup
阿新 • • 發佈:2018-11-05
'''本次爬取講歷史網站'''
#!usr/bin/env python
#-*- coding:utf-8 _*-
"""
@author:Hurrican
@file: 分頁爬取資料.py
@time: 2018/11/03 9:30
"""
from bs4 import BeautifulSoup
import requests
def get_urls():
urls = ['http://www.jianglishi.cn/jiemi/page_{}.html'.format(str(i)) for i in range(2, 21)]
urls.insert(0, 'http://www.jianglishi.cn/jiemi/')
return urls
def get_title():
for a1 in get_urls():
web_data = requests.get(a1)
web_data.encoding = 'utf-8'
web = web_data.text
soup = BeautifulSoup(web, 'html5lib')
soup1 = soup.findAll(name='div', attrs={'class': 'title'})
for piece in soup1:
title = piece.a.string
print(title)
if __name__ == '__main__':
get_title()
執行結果: