1. 程式人生 > >我愛我家房源資訊爬取

我愛我家房源資訊爬取

我愛我家房源資訊獲取

  • 無特殊爬取需求
import requests
from lxml import etree
from mysql_link import mysql_connect

def get_5i5j(count):
    mysql_ = mysql_connect()

    headers = {
        'Cookie': '_Jo0OQK=6B2EFBECBAB6D76BCDB834644B1B2D3BC2FFE7FE5ECC9F67E588A57175B2C4A553BB1B99580083D10FBE3107B2235A474021805425FF6DC8C7E536BB944BCFF6EB1DE8682CA7D10E3B498FB9E3C853EFEE298FB9E3C853EFEE215D8BEE34E43E5C0GJ1Z1Jw==; PHPSESSID=u518ep3lfv9sig9rt3jfdtrf2j; _ga=GA1.2.656332641.1534582894; _gid=GA1.2.1467514563.1534582894; yfx_c_g_u_id_10000001=_ck18081817013612438566341316835; yfx_f_l_v_t_10000001=f_t_1534582896243__r_t_1534582896243__v_t_1534582896243__r_c_0; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534582902; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534583124'
, 'Host': 'bj.5i5j.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36', } url = 'https://bj.5i5j.com/zufang/huilongguan/n%d/' for num in range(1, count+1): full_url = url%num response = requests.get(full_url,headers=headers) # with open('5i5j.html','wb')as f:
# f.write(response.content) html_ele = etree.HTML(response.text) li_list = html_ele.xpath('//div[@class="list-con-box"]/ul/li') for li in li_list: title = li.xpath('./div[2]/h3/a/text()') # if len(title): # print(title[0]) # 面積
area = li.xpath('./div[2]/div[1]/p[1]/text()') print(area[0]) # 位置整理 distance = li.xpath('./div[2]/div[1]/p[2]/text()') # print(distance) distance1 = li.xpath('./div[2]/div[1]/p[2]/a/text()') # print(distance1) if len(distance)>0: d = distance1+distance dd = d[0]+d[1] else: dd = distance1[0] print(dd) # 價格 price = li.xpath('./div[2]/div[1]/div/p/strong') print(price[0].text) sql = 'insert into 5i5j (title,area,descr,price)values("{}","{}","{}","{}")'.format(title[0],area[0],dd,price[0].text) print(sql) mysql_.mysql_do(sql) # print(title[0].replace(' '*2,"")) if __name__ == '__main__': get_5i5j(3)