1. 程式人生 > >Python+Selenium進行網頁多視窗切換爬蟲

Python+Selenium進行網頁多視窗切換爬蟲

#!/usr/bin/python
# -*- coding:utf8 -*-
import time
import random
import os
import re
import xlwt
import requests
import numpy as np
import xlsxwriter
import pandas as pd
print os.getcwd()
company = pd.read_excel('E:\***\company.xlsx')
num1 = company.shape[0]
num2 = company.shape[1]
from selenium import 
webdriver driver = webdriver.Chrome() driver.get('**********') for i in range(num1): time.sleep(1) driver.find_element_by_id("live-search").send_keys(company[u"公司名稱"][i]) time.sleep(1) driver.find_element_by_xpath( "/html/body/div[1]/div[1]/div[1]/div[2]/div/div[2]/div[2]/div/div[1]/div[1]/div"
).click() time.sleep(2) driver.find_element_by_xpath( "/html/body/div[2]/div[1]/div/div/div[1]/div[3]/div[1]/div[2]/div[1]/div[1]/a/span/em").click() print driver.current_url # 獲得當前視窗 nowhandle = driver.current_window_handle # 獲得所有視窗 allhandles = driver.window_handles # 迴圈判斷視窗是否為當前視窗
for handle in allhandles: if handle != nowhandle: driver.switch_to_window(handle) print 'now register window!' print driver.current_url #電話 test1 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[2]/div[1]/span[2]").text test1 = test1.encode("utf-8") company[u"電話"][i] = test1 #郵箱 test2 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[2]/div[2]/span[2]").text company[u"郵箱"][i]=test2 #網址 test3 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[3]/div[1]/a").text company[u"網址"][i] = test3 #地址 test4 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[3]/div[2]/span[2]").text company[u"地址"][i] = test4 # 獲得當前視窗 nowhandle = driver.current_window_handle # 獲得所有視窗 allhandles = driver.window_handles # 迴圈判斷視窗是否為當前視窗 for handle in allhandles: if handle == nowhandle: driver.close() print 'now register window!' for handle in allhandles: if handle != nowhandle: driver.switch_to_window(handle) print 'register window!' driver.back() time.sleep(3) print company