Python爬蟲小案例
阿新 • • 發佈:2019-01-04
'''
模組註釋
'''
from urllib import request
import re
class Spider():
'''
類的註釋,註釋寫在類下面
'''
url = 'https://www.panda.tv/cate/lol'
rootPattern = '<div class="video-info">([\s\S]*?)</div>'
namePattern = '</i>([\s\S]*?)</span>'
numberPattern = '<span class="video-number">([\s\S]*?)</span>'
def __fetchContent(self):
'''
獲取html內容,寫在方法下面
'''
r = request.urlopen(Spider.url)
# bytes
htmls = r.read()
htmls = str(htmls, encoding='utf-8')
return htmls
def __analysis(self, htmls):
'''
分析出列表資料
'''
rootHtml = re.findall(Spider.rootPattern, htmls)
anchors = []
for html in rootHtml:
name = re.findall(Spider.namePattern, html)
number = re.findall(Spider.numberPattern, html)
anchor = {'name': name, "number": number}
anchors.append(anchor)
return anchors
def __refine(self, anchors):
'''
精簡列表,格式化字串
'''
l = lambda anchor : {'name': anchor['name'][0].strip(),
'number': anchor['number'][0]}
return map(l, anchors)
def __sort(self, anchors):
'''
排序
'''
r = sorted(anchors, key=self.__sortSeed, reverse=True)
return r
def __sortSeed(self, anchor):
'''
設定元組比較的欄位
'''
num = re.findall('(\d*)', anchor['number'])
number = float(num[0])
if '萬' in anchor['number']:
number *= 10000
return number
def __show(self, anchors):
'''
展示資料
'''
for index in range(0, len(anchors)):
print(str(index + 1) + ':' + anchors[index]['name'] + '---------' + anchors[index]['number'])
def do(self):
'''總控'''
htmls = self.__fetchContent()
anchors = self.__analysis(htmls)
anchors = list(self.__refine(anchors))
anchors = self.__sort(anchors)
self.__show(anchors)
spider = Spider()
spider.do()