python+ selenium爬取房天下新房詳情

阿新 • • 發佈：2021-06-16

新房詳情

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep
import json
from datetime import datetime
import re

option = webdriver.ChromeOptions()
# 防止列印一些無用的日誌
option.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging 
'])



b = webdriver.Chrome(executable_path ="D:\chrome_driver_win32\chromedriver.exe", chrome_options=option)
num = 1
base_urls = "https://nanjing.newhouse.fang.com/house/s/b9{}/".format(num)    

b.get(base_urls)
name = b.find_elements_by_xpath('//*[@class="nl_con clearfix"]/ul/li/div/div[1]/a')
house_lst = []
 
for i in name:
    href = (i.get_attribute('href'))
    house_lst.append(href)

data_list = []
for url in house_lst:
    b.get(url)
    data = {}
    # 獲取樓盤詳情
    quyu = b.find_element_by_xpath(
        '//div[@class="br_left"]//ul[@class="tf f12"]//li[3]/a').text  # 一級區域


    data['subarea'] = quyu[:-2]     # 
 字串切片，去掉後面2個字
    data['area'] = b.find_element_by_xpath('//div[@class="s2"]/div/a').text  # 當前城市


    try:
        # 詳情裡的屬性
        fangyuan_url = b.find_element_by_xpath(
            "//*[@class='main_1200 tf']//div[@class='cxfnav']//a[contains(text(),'樓盤詳情')]")
        href1 = fangyuan_url.get_attribute('href')
        b.get(href1)

        nodes= any
        main_items = b.find_elements_by_xpath('//div[@class="main_1200 tf"]//div[@class="main_1200"]//div[@class="main-cont clearfix"]//div[@class="main-left"]//div[@class="main-item"]')
        for i in main_items:
            # print(i.find_element_by_xpath(".//h3").text)   # .//表示當前目錄下的 xxx
            nodes1 = i.find_elements_by_xpath('.//ul//li')
            for n in nodes1:
                print(n.text)
                print('-'*50)


        # xxx位置及周邊
        dingwei_url = b.find_element_by_xpath('//div[@class="mapbox_dt"]/iframe').get_attribute(
            "src")  # 獲取定位連線
        b.get(dingwei_url)
        sound_code = b.page_source    # 獲取網站的原始碼
        re_search = re.search(r'"mapx":"(.*?)","mapy":"(.*?)"', sound_code, re.DOTALL)  # 樓盤座標..正則匹配"mapx":後面數數字
        data['housecoord'] = re_search.group(2) + "," + re_search.group(1)

    except Exception as e:
        pass


    data_list.append(data)
    break

print(data_list)

with open('詳情(南京).jsonlines', 'a', encoding='utf8') as f:
    for data in data_list:
        json.dump(data, f, ensure_ascii=False)
        f.write('\n')


b.quit()

python+ selenium爬取房天下新房詳情

新房詳情 from selenium import webdriver from selenium.webdriver.chrome.options import Options from time import sleep

Python爬取房天下二手房資訊

一、相關知識 BeautifulSoup4使用 python將資訊寫入csv import csv with open(\"11.csv\",\"w\") as csvfile:

Python selenium爬取微博資料程式碼例項

爬取某人的微博資料，把某人所有時間段的微博資料都爬下來。具體思路：建立driver-----get網頁----找到並提取資訊-----儲存csv----翻頁----get網頁（開始迴圈）----...----沒有“下一頁”就結束，

Python selenium爬取微信公眾號文章程式碼詳解

參照資料：selenium webdriver新增cookie: https://www.jb51.net/article/193102.html 需求：想閱讀微信公眾號歷史文章，但是每次找回看得地方不方便。

Python selenium 爬取cnvd(國家資訊保安漏洞共享平臺)

#coding = utf-8#@author :今夕#@Time :2021.08.06 16:09#@file :mian.py#@software :PyCharmimport timefrom selenium import webdriverfrom bs4 import BeautifulSoupimport reimport pymysqlimport random#應用漏

Python selenium 爬取cnvd(國家資訊保安漏洞共享平臺)剩餘部分

# coding = utf-8# @author :今夕# @Time :2021.08.10 09:22# @file :main2.py# @software :PyCharmimport timefrom selenium import webdriverfrom bs4 import BeautifulSoupimport reimport pymysqlimport random

python Selenium爬取實戰

### python Selenium爬取實戰 @[toc]目標網站： ```https://spa2.scrape.center/``` 這個網站是一個電影評分網站，採用selenium進行爬取

Python進階之使用selenium爬取淘寶商品資訊功能示例

本文例項講述了Python進階之使用selenium爬取淘寶商品資訊功能。分享給大家供大家參考，具體如下：

基於python requests selenium爬取excel vba過程解析

目的：基於辦公與網際網路隔離，自帶的office軟體沒有帶本地幫助工具，因此在寫vba程式時比較不方便(後來發現07有自帶，心中吐血，瞎折騰些什麼）。所以想到通過爬蟲在官方摘錄下來作為參考。

python 使用selenium爬取進擊的巨人漫畫

1 import requests 2 from bs4 import BeautifulSoup 3 import os 4 from selenium import webdriver 5 from selenium.webdriver.firefox.webdriver import WebDriver