使用Selenium爬取資訊
阿新 • • 發佈:2022-03-04
1.使用Selenium爬取資訊
import com.oasis.mdata.entities.GameInfo import org.jsoup.Jsoup import org.jsoup.nodes.Document import org.jsoup.select.Elements import org.openqa.selenium.By import org.openqa.selenium.firefox.FirefoxDriver import org.openqa.selenium.firefox.FirefoxOptions import org.openqa.selenium.firefox.FirefoxProfile /** *@author 沒有夢想的java菜鳥 * @date 2022/03/02 11:48 上午 */ class Selenium { var url = "https://www.qimai.cn/rank/index/brand/grossing/device/iphone/country/us/genre/6014/date/" fun gameInformation(keyword: String): MutableList<GameInfo> { System.setProperty("webdriver.gecko.driver", "/usr/local/bin/geckodriver") // System.setProperty("webdriver.chrome.driver", "/usr/local/bin/chromedriver") var options = FirefoxOptions() val profile = FirefoxProfile() // var options = ChromeOptions() // val profile = ChromeProfile() //禁止GPU渲染 options.addArguments("--disable-gpu") options.addArguments("--headless") //忽略錯誤 options.addArguments("ignore-certificate-errors") //禁止瀏覽器被自動化的提示 options.addArguments("--disable-infobars") //反爬關鍵:window.navigator.webdrive值=false********************* options.addPreference("dom.webdriver.enabled", false) //設定請求頭 profile.setPreference( "general.useragent.override", "Mozilla/5.0(iPhone;CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML,like Gecko) Version/11.0 Mobile/15A372 Safari/604.1" ) // profile.setPreference( // "general.useragent.override", // "Mozilla/5.0 (Linux; Android 4.1.1; GT-N7100 Build/JRO03C) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/6.3") // 代理ip // val proxyStr="202.104.45.154:64257" // val proxy= Proxy().setHttpProxy(proxyStr).setSslProxy(proxyStr) // options.setProxy(proxy) options.profile = profile //禁用快取 options.addPreference("network.http.use-cache", false) options.addPreference("browser.cache.memory.enable", false) options.addPreference("browser.cache.disk.enable", false) options.addPreference("browser.sessionhistory.max_total_viewers", 3) options.addPreference("network.dns.disableIPv6", true) options.addPreference("Content.notify.interval", 750000) options.addPreference("content.notify.backoffcount", 3) options.addPreference("network.http.pipelining", true) options.addPreference("network.http.proxy.pipelining", true) options.addPreference("network.http.pipelining.maxrequests", 32) val driver = FirefoxDriver(options) driver.executeScript("Object.defineProperty(navigator, 'webdriver', {get: () => false})") driver.get("$url$keyword") // driver.get("https://www.baidu.com/") driver.executeScript("window.scrollTo(0,100000)") Thread.sleep(4000) val dom = Jsoup.parse(driver.pageSource) // println(dom) val div = dom.getElementsByClass("info") val flag = exist(div, dom, driver) return if (!flag) { getGameInfo(driver, keyword, div) } else { driver.executeScript("window.scrollTo(0,100000)") Thread.sleep(2000) val dom = Jsoup.parse(driver.pageSource) val div = dom.getElementsByClass("info") getGameInfo(driver, keyword, div) } driver.close() } fun exist(div: Elements, dom: Document, driver: FirefoxDriver): Boolean { var flag = false if (div.size > 0) { } else { val loginDiv = dom.getElementsByClass("login-tip")[0] val loginUrl = "https://www.qimai.cn${loginDiv.select("a")[0].attr("href")}" Thread.sleep(2000) driver.get(loginUrl) Thread.sleep(2000) // val username = driver.findElement(By.xpath("/html/body/div[2]/div[4]/div/div[2]/div[1]/ul/li[1]/input")) // val password = driver.findElement(By.xpath("/html/body/div[2]/div[4]/div/div[2]/div[1]/ul/li[2]/input")) // val loginButton = driver.findElement(By.xpath("/html/body/div[2]/div[4]/div/div[2]/div[2]")) val username=driver.findElement(By.xpath("//input[@placeholder='請輸入手機號/郵箱']")) val password = driver.findElement(By.xpath("//input[@placeholder='請輸入密碼']")) val loginButton = driver.findElement(By.xpath("//div[@class='signin-btn']")) username.sendKeys("13037117092") password.sendKeys("wl990922") loginButton.click() flag = true } return flag } fun getGameInfo(driver: FirefoxDriver, keyword: String, div: Elements): MutableList<GameInfo> { val list = ArrayList<GameInfo>() var index = 1 div.map { val gameName = it.select("p")[0].text() list.add(GameInfo(sort = index, name = gameName, dateTime = keyword)) index++ } return list } }