Python+Selenium進行網頁多視窗切換爬蟲
阿新 • • 發佈:2019-01-24
#!/usr/bin/python # -*- coding:utf8 -*- import time import random import os import re import xlwt import requests import numpy as np import xlsxwriter import pandas as pd print os.getcwd() company = pd.read_excel('E:\***\company.xlsx') num1 = company.shape[0] num2 = company.shape[1] from selenium importwebdriver driver = webdriver.Chrome() driver.get('**********') for i in range(num1): time.sleep(1) driver.find_element_by_id("live-search").send_keys(company[u"公司名稱"][i]) time.sleep(1) driver.find_element_by_xpath( "/html/body/div[1]/div[1]/div[1]/div[2]/div/div[2]/div[2]/div/div[1]/div[1]/div").click() time.sleep(2) driver.find_element_by_xpath( "/html/body/div[2]/div[1]/div/div/div[1]/div[3]/div[1]/div[2]/div[1]/div[1]/a/span/em").click() print driver.current_url # 獲得當前視窗 nowhandle = driver.current_window_handle # 獲得所有視窗 allhandles = driver.window_handles # 迴圈判斷視窗是否為當前視窗for handle in allhandles: if handle != nowhandle: driver.switch_to_window(handle) print 'now register window!' print driver.current_url #電話 test1 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[2]/div[1]/span[2]").text test1 = test1.encode("utf-8") company[u"電話"][i] = test1 #郵箱 test2 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[2]/div[2]/span[2]").text company[u"郵箱"][i]=test2 #網址 test3 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[3]/div[1]/a").text company[u"網址"][i] = test3 #地址 test4 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[3]/div[2]/span[2]").text company[u"地址"][i] = test4 # 獲得當前視窗 nowhandle = driver.current_window_handle # 獲得所有視窗 allhandles = driver.window_handles # 迴圈判斷視窗是否為當前視窗 for handle in allhandles: if handle == nowhandle: driver.close() print 'now register window!' for handle in allhandles: if handle != nowhandle: driver.switch_to_window(handle) print 'register window!' driver.back() time.sleep(3) print company