1. 程式人生 > 實用技巧 >使用bs4爬蟲當前網頁,並下載網頁資料

使用bs4爬蟲當前網頁,並下載網頁資料

需求:

"""獲取所有的職業資訊,並以崗位、公司、地區、薪資格式來分別顯示資料"""

# 匯入需要的庫或包
import bs4,csv
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

# 喚起chrome瀏覽器
browser = webdriver.Chrome()
browser.get('https://www.zhipin.com/')
browser.implicitly_wait(10)

# 輸入關鍵詞搜尋
check_ele = browser.find_element_by_css_selector('.ipt-search')
check_ele.send_keys('python開發工程師')
# 回車
check_ele.send_keys(Keys.ENTER)

# 資訊提取
info = []

def get_info(html):
soup = BeautifulSoup(html, 'lxml')
job_primary = soup.select_one('#main').select('.job-primary')
# print(type(job_primary))
for job in job_primary:
if isinstance(job, bs4.element.Tag):
job_name = job.select_one('.job-name').text
job_area = job.select_one('.job-area').text
job_limit = job.select_one('.red').text
company_name = job.select_one('.company-text').select_one('.name').text
sub_info = [job_name, job_area, job_limit, company_name]
info.append(sub_info)
# 儲存資料
def save_data(data):
with open('../job_info.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['崗位名稱', '工作區域', '薪資', '公司名稱'])
for a in data:
print(a)
writer.writerow(a)
# 呼叫 資訊提取 & 儲存資料 ,關閉瀏覽器
get_info(browser.page_source)
save_data(info)
browser.close()