scrapy爬蟲(2)之css
阿新 • • 發佈:2018-12-26
- css的功能和上一篇的xpath一樣,擇一即可
# css:
front_image_url = response.meta.get("front_image_url", "") # 文章封面圖
title2 = response.css(".entry-header h1::text").extract()[0]
create_data2 = response.css("p.entry-meta-hide-on-mobile::text").extract()[0].strip().replace('·', '')
praise_nums2 = int (response.css(".href-style h10::text").extract()[0])
favor_nums2 = response.css(".bookmark-btn::text").extract()[0]
match_re3 = re.match(".*?(\d+).*", favor_nums2)
if match_re3:
favor_nums2 = int(match_re3.group(1))
else:
favor_nums2 = 0
comment_nums2 = response .css("span.hide-on-480::text").extract()[0]
match_re4 = re.match(".*?(\d+).*", comment_nums2)
if match_re4:
comment_nums2 = int(match_re4.group(1))
else:
comment_nums2 = 0
content2 = response.css('div.entry').extract()[0]
tag_list2 = response .css("p.entry-meta-hide-on-mobile a::text").extract()
[element for element in tag_list2 if not element.strip().endswith("評論")]
tags2 = ",".join(tag_list2)