教師結構化面試,一鍵獲取資料
阿新 • • 發佈:2018-11-09
教師結構化面試,一個程式設計師男朋友,為愛而碼
【需求】
1、教 結構化面試資料太少,而且最好列印
2、資料儲存word檔案,方便共享
作為一名程式設計師,開始coding...
把如下程式碼儲存為download.py,雙擊執行,結果如下:
#coding=utf-8
from lxml import etree
import requests
from docx import Document
import re
class Download():
def __init__(self):
pass
def getPageUrl(self):
paperAll = {}
for pageNum in range(1,31):
url="http://wap.zgjsks.com/html/jszp/mianshi/jiegouhua/{}.html".format(pageNum)
mPage = requests.get(url)
selector = etree.HTML(mPage.content) # 將原始碼轉化為能被XPath匹配的格式
paperList = selector.xpath("//*[contains(concat(' ', @class, ' '), 'recruit_right')]/b/a" )
for paper in paperList:
paperUrl=paper.attrib['href']
paperTitle=paper.text
paperAll[paperUrl] = paperTitle
return paperAll
def getPagerNextMaxNum(self,paperUrl):
content = requests.get(paperUrl)
selector = etree.HTML(content.content) # 將原始碼轉化為能被XPath匹配的格式
try:
pagerNextMaxNum = int(selector.xpath("//*[contains(concat(' ', @class, ' '), 'fenye')]")[0].text.strip("\n\t").strip("()").split("/")[1])+1
except:
pagerNextMaxNum = 2
return pagerNextMaxNum
def getNodeText(self,nodeP):
paperLines=''
if nodeP.text!=None:
paperLines = nodeP.text
for childNode in nodeP.getchildren():
if childNode.text != None:
paperLines += childNode.text
if childNode.tail!=None:
paperLines +=childNode.tail
self.getNodeText(childNode)
return paperLines
def download(self):
paperAll= self.getPageUrl()
for paperUrl,paperTitle in paperAll.items():
#paperTitle="對“不要讓孩子輸在起跑線上”這種說法,你怎麼"
#paperUrl="http://wap.zgjsks.com/html/2017/jiegouhua_0526/232694.html"
print("[*D]{} -- {}".format(paperTitle,paperUrl))
pagerNextMaxNum =self.getPagerNextMaxNum(paperUrl)
paperContent = []
for pageNextUrlNum in range(1,pagerNextMaxNum):
pageNextUrl=paperUrl.replace(".html","_{}.html".format(pageNextUrlNum))
content=requests.get(pageNextUrl)
selector = etree.HTML(content.content) # 將原始碼轉化為能被XPath匹配的格式
paperList = selector.xpath("//*[contains(concat(' ', @class, ' '), 'article_box_info')]/p")
for paper in paperList:
paperLine=self.getNodeText(paper)
paperContent.append(paperLine)
document = Document()
document.add_heading(paperTitle, 0)
for paperLine in paperContent:
if paperLine=='相關推薦:':
break
if paperLine==None:
continue
p = document.add_paragraph(paperLine)
#'教師招聘面試指導|結構化面試——未來教育系統考'
paperTitle = re.sub('[\/:*?"<>|]','-',paperTitle)
document.save('{}.docx'.format(paperTitle))
if __name__=="__main__":
downloadObj =Download()
downloadObj.download()