1. 程式人生 > >Python3-selenium\phantomjs\bs4爬取鬥魚頁面

Python3-selenium\phantomjs\bs4爬取鬥魚頁面

from selenium import webdriver
import time
from bs4 import BeautifulSoup

class douyuSelenium():
    #初始化,啟動鬥魚瀏覽器
def setup(self):
        self.driver=webdriver.PhantomJS()

    #獲取鬥魚房間資訊
def testDouyu(self):
        self.driver.get('https://www.douyu.com/directory/all')

        while True:
            time.sleep(2
) #指定解析器,生成一個soup物件 soup=BeautifulSoup(self.driver.page_source,'lxml') # 獲取當前頁面所有的房間標題,觀眾人數 titles=soup.find_all('h3',{'class':'ellipsis'}) # for title in titles: # title=title.text.strip() # print(title) # #人氣 nums=soup.find_all('span'
,{'class':'dy-num fr'}) # for num in nums: # num=num.text.strip()'房間標題:'+title.text.strip()+'\t'+'人氣:'+num.text # print(num) # print(title+'\t'+num) for title,num in zip(titles,nums):#感覺標題和人氣不匹配 info='房間標題:' + title.text.strip() + '\t' + '人氣:'
+ num.text print(info) #下一頁 #查詢下一頁 # self.driver.find_element_by_class_name('shark-pager-next shark-pager-disable shark-pager-disable-next') if self.driver.page_source.find('shark-pager-disable-next')!=-1: break #點選 next_page=self.driver.find_element_by_class_name('shark-pager-next') next_page.click() #退出 def shutdown(self): print('載入完成。。。。') self.driver.quit() if __name__=='__main__': douyu=douyuSelenium() douyu.setup() douyu.testDouyu() douyu.shutdown()