selenium的webdriver.Chrome()模擬點選鬥魚頁面
阿新 • • 發佈:2019-01-01
#!/usr/bin/env python # coding=utf-8 from bs4 import BeautifulSoup import unittest from selenium import webdriver import time class Douyu(unittest.TestCase): # 初始化方法,必須是setUp def setUp(self): self.driver = webdriver.Chrome() self.num = 0 self.count = 0 self.list = [] # 具體的測試用例方法,一定要用test開頭 def testDouyu(self): self.driver.get("https://www.douyu.com/directory/all") while True: soup = BeautifulSoup(self.driver.page_source, 'lxml') # 返回頁面的房間標題列表和觀眾人數 titles = soup.find_all('h3', {'class': 'ellipsis'}) nums = soup.find_all('span', {'class': 'dy-num fr'}) # 使用zip()函式把列表合併,並建立一個元組對的列表[(1,2),(3,4)...] for title, num in zip(titles, nums): print(u"觀眾人數:" + num.get_text().strip(), u"\t房間標題:" + title.get_text().strip()) self.num += 1 i = num.get_text().strip() # print(type(i)) # str型別 if i[-1] =='萬': j = float(i.replace('萬',''))*10000 self.list.append(int(j)) else: self.list.append(int(i)) # 如果點選到最後找到了“shark-pager-disable-next”不會返回-1,終止迴圈,page_source.find()未找到內容則返回-1 if self.driver.page_source.find("shark-pager-disable-next") != -1: break # 模擬點選下一頁 self.driver.find_element_by_class_name("shark-pager-next").click() time.sleep(4) def tearDown(self): # 退出 print("載入完成。。。") # print(self.list) # print(len(self.list)) for x in self.list: # print(type(x)) # print(x) self.count += x print('總直播人數:'+str((self.num))) # print('總計%s人觀看'%self.count) if self.count%10000%1000 != 0: if self.count%10000//1000!=0: print('合計%s人觀看' % (str((self.count)//10000) + '萬' + str(self.count%10000//1000) +'千' + str(self.count % 10000 % 1000))) else: print('合計%s人觀看' % (str((self.count)//10000)+ '萬' + str(self.count % 10000 % 1000))) else: print('合計%s人觀看'%(str((self.count)//10000) + '萬' + str(self.count%10000//1000)+'千')) self.driver.quit() if __name__ == "__main__": unittest.main()