爬蟲實戰(安居客二手房和租房資訊爬取)
阿新 • • 發佈:2022-12-05
不說廢話,直接上程式碼!
import requests import csv import parsel # 解析資料 # 安居客二手房資訊 # 網頁原始碼解析 # json資料解析 for page in range(1,51): print(f"正在爬取第-----{page}------頁資料資訊!") url=f'https://anjuke.com/sale/p{page}/' header = { 'cookie': 'sessid=E8557945-A48A-DECA-D8A1-102112E95525; aQQ_ajkguid=6005B887-989E-9E1E-EB74-C01BBCE2362D; twe=2; ajk-appVersion=; fzq_h=3cdd8dc4ff49c08b22268609df890299_1670208027764_406af778a32a4516a91f70fef3d1409d_986905475; id58=CrIclWONWihf8mLgZefRAg==; ctid=231; lps=https%3A%2F%2Fyx.zu.anjuke.com%2F%7Chttps%3A%2F%2Fyuxi.anjuke.com%2F; cmctid=2040; wmda_uuid=48c23ab83834bd513b73a85f47e86a23; wmda_new_uuid=1; wmda_session_id_6289197098934=1670208078386-18df4190-2941-5746; wmda_visited_projects=%3B6289197098934; obtain_by=1; xxzl_cid=df78ca61c6fa40b680bd980d2ff3bd01; xxzl_deviceid=out+sEolHB8HXmPxXFzGJuGNceTZiUsWOVAr25QoCxZqXuiGQDtGyv3aQwmOHRGV', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', 'referer': 'https://yx.zu.anjuke.com/fangyuan/' } response = requests.get(url=url,headers=header) html_data = response.text # print(response) selectors = parsel.Selector(html_data) lis= selectors.css('.property-content') for li in lis: 標題 = li.css('.property-content-title-name::text').get() 戶型 = li.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(1) span::text').getall() 面積 = li.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(2)::text').getall() 朝向 = li.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(3)::text').get() 樓層 = li.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(4)::text').get() 建造時間 = li.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(5)::text').get() 小區名稱 = li.css('.property-content-info:nth-child(2) .property-content-info-comm-name::text').getall() 小區地址 = li.css('.property-content-info:nth-child(2) .property-content-info-comm-address span::text').getall() 總價 = li.css('.property-price .property-price-total .property-price-total-num::text').getall() 單價 = li.css('.property-price .property-price-average::text').getall() print(標題) with open('./txt/玉溪安居客二手房.csv',mode='a+',encoding='utf-8-sig',newline='') as f: csv_text = csv.writer(f) csv_text.writerow((標題,戶型,面積,朝向,樓層,建造時間,小區名稱,小區地址,總價,單價)) print("爬取完成!")
這是爬取下來的格式,當然裡面的標點符號我替換掉了
租房資訊:
import requests import csv import parsel # 解析資料 # 安居客租房資訊 # 網頁原始碼解析 # json資料解析 for page in range(1,34): print(f"正在爬取第-----{page}------頁資料資訊!") url=f'https://yx.zu.anjuke.com/fangyuan/p{page}/' header = { 'cookie': 'sessid=E8557945-A48A-DECA-D8A1-102112E95525; aQQ_ajkguid=6005B887-989E-9E1E-EB74-C01BBCE2362D; twe=2; ajk-appVersion=; fzq_h=3cdd8dc4ff49c08b22268609df890299_1670208027764_406af778a32a4516a91f70fef3d1409d_986905475; id58=CrIclWONWihf8mLgZefRAg==; ctid=231; lps=https%3A%2F%2Fyx.zu.anjuke.com%2F%7Chttps%3A%2F%2Fyuxi.anjuke.com%2F; cmctid=2040; wmda_uuid=48c23ab83834bd513b73a85f47e86a23; wmda_new_uuid=1; wmda_session_id_6289197098934=1670208078386-18df4190-2941-5746; wmda_visited_projects=%3B6289197098934; obtain_by=1; xxzl_cid=df78ca61c6fa40b680bd980d2ff3bd01; xxzl_deviceid=out+sEolHB8HXmPxXFzGJuGNceTZiUsWOVAr25QoCxZqXuiGQDtGyv3aQwmOHRGV', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', 'referer': 'https://yx.zu.anjuke.com/fangyuan/' } response = requests.get(url=url,headers=header) html_data = response.text # print(response) selectors = parsel.Selector(html_data) lis = selectors.css('.zu-itemmod') for li in lis: 標題 = li.css('.zu-info .strongbox::text').get() 戶型 = li.css('.zu-info .strongbox::text').getall()[1]+'室'+li.css('.strongbox::text').getall()[2]+'廳' 面積 = li.css('.zu-info .strongbox::text').getall()[3]+'㎡' 價格 = li.css('.zu-side .strongbox::text').getall() 出租型別 = li.css('.zu-info .cls-1::text').getall() 朝向 = li.css('.zu-info .cls-2::text').getall() 電梯 = li.css('.zu-info .cls-3::text').getall() 位置 = li.css('.zu-info .details-item::text').getall()[8] 小區 = li.css('.zu-info a::text').getall() 樓層 = li.css('.zu-info p::text').getall()[4] 聯絡人 = li.css('.zu-info p::text').getall()[5] print(位置) with open('./txt/玉溪安居客租房.csv',mode='a+',encoding='utf-8-sig',newline='') as f: csv_text = csv.writer(f) csv_text.writerow((標題,戶型,面積,價格,出租型別,朝向,電梯,位置,小區,樓層,聯絡人)) print("爬取完成!")
這是爬取後修改為.xlsx格式的
懂得都懂。現在爬取資料,後續更新對於資料的處理的內容文章!