1. 程式人生 > >如何分頁爬去資料--beautisoup

如何分頁爬去資料--beautisoup

'''本次爬取講歷史網站'''
#!usr/bin/env python

#-*- coding:utf-8 _*-
"""
@author:Hurrican
@file: 分頁爬取資料.py
@time: 2018/11/03 9:30

"""
from bs4 import BeautifulSoup
import requests

def get_urls():
urls = ['http://www.jianglishi.cn/jiemi/page_{}.html'.format(str(i)) for i in range(2, 21)]
urls.insert(0, 'http://www.jianglishi.cn/jiemi/')

return urls

def get_title():
for a1 in get_urls():
web_data = requests.get(a1)

web_data.encoding = 'utf-8'
web = web_data.text
soup = BeautifulSoup(web, 'html5lib')
soup1 = soup.findAll(name='div', attrs={'class': 'title'})
for piece in soup1:
title = piece.a.string

print(title)

if __name__ == '__main__':
get_title()
執行結果: