1. 程式人生 > 其它 >使用Selenium爬取資訊

使用Selenium爬取資訊

1.使用Selenium爬取資訊
import com.oasis.mdata.entities.GameInfo
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.select.Elements
import org.openqa.selenium.By
import org.openqa.selenium.firefox.FirefoxDriver
import org.openqa.selenium.firefox.FirefoxOptions
import org.openqa.selenium.firefox.FirefoxProfile


/**
 *@author 沒有夢想的java菜鳥
 * @date 2022/03/02 11:48 上午
 */
class Selenium {

    var url = "https://www.qimai.cn/rank/index/brand/grossing/device/iphone/country/us/genre/6014/date/"

    fun gameInformation(keyword: String): MutableList<GameInfo> {

        System.setProperty("webdriver.gecko.driver", "/usr/local/bin/geckodriver")
//        System.setProperty("webdriver.chrome.driver", "/usr/local/bin/chromedriver")
        var options = FirefoxOptions()
        val profile = FirefoxProfile()
//        var options = ChromeOptions()
//        val profile = ChromeProfile()
        //禁止GPU渲染
        options.addArguments("--disable-gpu")
        options.addArguments("--headless")
        //忽略錯誤
        options.addArguments("ignore-certificate-errors")
        //禁止瀏覽器被自動化的提示
        options.addArguments("--disable-infobars")
        //反爬關鍵:window.navigator.webdrive值=false*********************
        options.addPreference("dom.webdriver.enabled", false)
        //設定請求頭
        profile.setPreference(
            "general.useragent.override",
            "Mozilla/5.0(iPhone;CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML,like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
        )
//        profile.setPreference(
//            "general.useragent.override",
//            "Mozilla/5.0 (Linux; Android 4.1.1; GT-N7100 Build/JRO03C) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/6.3")

        // 代理ip
//        val proxyStr="202.104.45.154:64257"
//        val proxy= Proxy().setHttpProxy(proxyStr).setSslProxy(proxyStr)
//        options.setProxy(proxy)

        options.profile = profile
        //禁用快取
        options.addPreference("network.http.use-cache", false)
        options.addPreference("browser.cache.memory.enable", false)
        options.addPreference("browser.cache.disk.enable", false)
        options.addPreference("browser.sessionhistory.max_total_viewers", 3)
        options.addPreference("network.dns.disableIPv6", true)
        options.addPreference("Content.notify.interval", 750000)
        options.addPreference("content.notify.backoffcount", 3)
        options.addPreference("network.http.pipelining", true)
        options.addPreference("network.http.proxy.pipelining", true)
        options.addPreference("network.http.pipelining.maxrequests", 32)

        val driver = FirefoxDriver(options)
        driver.executeScript("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
        driver.get("$url$keyword")
//        driver.get("https://www.baidu.com/")
        driver.executeScript("window.scrollTo(0,100000)")
        Thread.sleep(4000)
        val dom = Jsoup.parse(driver.pageSource)
//        println(dom)
        val div = dom.getElementsByClass("info")
        val flag = exist(div, dom, driver)
        return if (!flag) {
            getGameInfo(driver, keyword, div)
        } else {
            driver.executeScript("window.scrollTo(0,100000)")
            Thread.sleep(2000)
            val dom = Jsoup.parse(driver.pageSource)
            val div = dom.getElementsByClass("info")
            getGameInfo(driver, keyword, div)
        }
        driver.close()
    }

    fun exist(div: Elements, dom: Document, driver: FirefoxDriver): Boolean {
        var flag = false
        if (div.size > 0) {

        } else {
            val loginDiv = dom.getElementsByClass("login-tip")[0]
            val loginUrl = "https://www.qimai.cn${loginDiv.select("a")[0].attr("href")}"
            Thread.sleep(2000)
            driver.get(loginUrl)
            Thread.sleep(2000)
//            val username = driver.findElement(By.xpath("/html/body/div[2]/div[4]/div/div[2]/div[1]/ul/li[1]/input"))
//            val password = driver.findElement(By.xpath("/html/body/div[2]/div[4]/div/div[2]/div[1]/ul/li[2]/input"))
//            val loginButton = driver.findElement(By.xpath("/html/body/div[2]/div[4]/div/div[2]/div[2]"))
            val username=driver.findElement(By.xpath("//input[@placeholder='請輸入手機號/郵箱']"))
            val password = driver.findElement(By.xpath("//input[@placeholder='請輸入密碼']"))
            val loginButton = driver.findElement(By.xpath("//div[@class='signin-btn']"))
            username.sendKeys("13037117092")
            password.sendKeys("wl990922")
            loginButton.click()
            flag = true
        }
        return flag
    }

    fun getGameInfo(driver: FirefoxDriver, keyword: String, div: Elements): MutableList<GameInfo> {
        val list = ArrayList<GameInfo>()
        var index = 1
        div.map {
            val gameName = it.select("p")[0].text()
            list.add(GameInfo(sort = index, name = gameName, dateTime = keyword))
            index++
        }
        return list
    }

}