1. 程式人生 > 其它 >前程無憂職位資訊爬取

前程無憂職位資訊爬取

技術標籤:pythonpython

前程無憂職位資訊爬取

# coding=UTF-8
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import requests
import bs4
import json
import xlwt

header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
} for a in range(1,6): workbook = xlwt.Workbook(encoding='utf-8') worksheet = workbook.add_sheet('My Worksheet') worksheet.write(0, 0, label='公司名稱') worksheet.write(0, 1, label='職務名稱') worksheet.write(0, 2, label='薪資') worksheet.write(0, 3, label='求職招聘網址') worksheet.write(0, 4,
label='崗位要求') url = "https://search.51job.com/list/080200,000000,0000,00,9,99,%25E5%25A4%25A7%25E6%2595%25B0%25E6%258D%25AE,2,{}.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=".format(a) info =
Request(headers=header,url=url) html = urlopen(info) bs = bs4.BeautifulSoup(html, 'html.parser') alls = bs.find_all('script',type='text/javascript') for x in alls: new_alls = x.get_text().replace('window.__SEARCH_RESULT__ = ','') if len(new_alls)>0: python_data=json.loads(new_alls) data_keys=python_data.get('engine_search_result') for i in range(0,len(data_keys)): company_names=data_keys[i]['company_name'] job_names=data_keys[i]['job_name'] salarys=data_keys[i]['providesalary_text'] hrefs=data_keys[i]['job_href'] worksheet.write(i+1, 0, label=company_names) worksheet.write(i+1, 1, label=job_names) worksheet.write(i+1, 2, label=salarys) worksheet.write(i+1, 3, label=hrefs) urls=hrefs infos = Request(urls,headers=header) htmls = urlopen(infos) bss = bs4.BeautifulSoup(htmls, 'html.parser') try: texts = bss.find('div', {"class":'bmsg job_msg inbox'}).get_text().split() job_requests="".join(texts) worksheet.write(i + 1, 4, label=job_requests) except: worksheet.write(i + 1, 4, label=' ') workbook.save('前程無憂{}.xls'.format(a))