1. 程式人生 > >day51——爬蟲(一)

day51——爬蟲(一)

ssi odi {0} linux import post ear quest mpi

 1 #!/usr/bin/env python
 2 # -*- coding: utf-8 -*-
 3 # @Time    : 2018/1/11 22:07
 4 # @Author  : lingxiangxiang
 5 # @File    : demon1.py
 6 ‘‘‘爬蟲阿銘linux教程,保存為本地的pdf文件‘‘‘
 7 # 需要知道三招教你做人
 8 # pdfkit.from_string("hello world", "1.pdf")
 9 # pdfkit.from_url("www.baidu.com", "2.pdf")
10 # pdfkit.from_file("hello.html", "3.pdf")
11 import re 12 13 import os 14 15 import pdfkit 16 import requests 17 18 19 if not os.path.exists("aminglinux"): 20 os.mkdir("aminglinux") 21 os.chdir("aminglinux") 22 23 url = "http://www.apelearn.com/study_v2/" 24 s = requests.session() 25 text = s.get(url).text 26 print(text) 27 reg = re.compile(r
<li class="toctree-l1"><a class="reference internal" href="(.*)">.*</a></li>) 28 result = reg.findall(text) 29 res = list(set(result)) 30 pdfUrl = "http://www.apelearn.com/study_v2/" 31 for i in res: 32 url = "{0}{1}".format(pdfUrl, i) 33 pdfFileName = i.replace("html", "pdf
") 34 print(pdfFileName) 35 try: 36 pdfkit.from_url(url, pdfFileName) 37 except: 38 continue

day51——爬蟲(一)