爬蟲07-美團
阿新 • • 發佈:2018-11-01
""" __title__ = '' __author__ = 'Thompson' __mtime__ = '2018/8/28' # code is far away from bugs with the god animal protecting I love animals. They taste delicious. ┏┓ ┏┓ ┏┛┻━━━┛┻┓ ┃ ☃ ┃ ┃ ┳┛ ┗┳ ┃ ┃ ┻ ┃ ┗━┓ ┏━┛ ┃ ┗━━━┓ ┃ 神獸保佑 ┣┓ ┃ 永無BUG! ┏┛ ┗┓┓┏━┳┓┏┛ ┃┫┫ ┃┫┫ ┗┻┛ ┗┻┛ """ import requests import re if __name__ == "__main__": url = "http://hotel.meituan.com/beijing/" headers = {"User-Agent":"Mozilla/5.0 (compatible; WOW64; MSIE 10.0; Windows NT 6.2)"} response = requests.get(url,headers = headers) html = response.content.decode() print(html) pat_1 = re.compile(r'<article class="poi-item".*?>(.*?)</article>',re.S | re.M) pat_2 = re.compile(r'<h3.*?>.*?<a.*?>.*?<em.*?>.*?</em>(.*?)</a>',re.S | re.M) # 標題 pat_3 = re.compile(r'<h3.*?>.*?<a href="(.*?)".*?>', re.S | re.M) # url pat_4 = re.compile(r'<div class="poi-grade".*?([0-9.]+)', re.S | re.M) # 評分 pat_5 = re.compile(r'<div class="poi-price".*?<em data-v-5be45891>(.*?)</em>') ls = pat_1.findall(html) print(len(ls)) for item in ls: matchObj = pat_2.search(item) if matchObj: title = matchObj.group(1) print(title) matchObj = pat_3.search(item) if matchObj: url = matchObj.group(1) print(url) matchObj = pat_4.search(item) if matchObj: score = matchObj.group(1) print(score) matchObj = pat_5.search(item) if matchObj: price = matchObj.group(1) print(price)