1. 程式人生 > >請教大佬,在用pycharm除錯的時候如何進入callback呼叫的函式

請教大佬,在用pycharm除錯的時候如何進入callback呼叫的函式

import scrapy
import re
from scrapy.http import Request
from urllib import parse


class JobboleSpider(scrapy.Spider):
    name = 'jobbole'
allowed_domains = ['blog.jobbole.com']
    start_urls = ['http://python.jobbole.com/all-posts/']

    def parse(self, response):
        """
        1. 解析文章列表頁中的文章url並且交給scrapy下載後完成解析
2. 獲取下一頁的url並交給scrapy下載,完成後交給parse """ # 獲取當前頁面所有文章的url並且交給scrapy進行下載 post_nodes = response.css("#archive .floated-thumb .post-thumb a") for post_node in post_nodes: image_url = post_node.css("img::attr(src)").extract_first("") post_url = post_node.css("::attr(href)"
).extract_first("") yield Request(url=parse.urljoin(response.url, post_url), meta={"front_image_url": image_url}, callback=self.parse_detail) # 提取下一頁url next_page = response.css(".next.page-numbers::attr(href)").extract_first() if next_page: yield Request(url
=parse.urljoin(response.url, next_page), callback=self.parse) def parse_detail(self, response): """ 1.解析具體頁面的內容 2.打算下載 """ # 標題,建立時間,點贊數 title = response.css(".entry-header > h1::text").extract_first() create_data = response.css(".entry-meta-hide-on-mobile::text").extract_first().strip().replace('·', '').strip() praise_num = response.css(".vote-post-up h10::text").extract_first() # 點贊數 fav_num = response.css(".bookmark-btn::text").extract_first().strip() fav_num_re = re.match(".*?(\d).*", fav_num) if fav_num_re: fav_num = fav_num_re.group(1) else: fav_num = 0 # 評論數 comment_num = response.css('a[href = "#article-comment"] span::text').extract()[0] comment_num_re = re.match(".*?(\d).*", comment_num) if comment_num_re: comment_num = comment_num_re.group(1) else: comment_num = 0 content = response.css('div.entry').extract()[0] # 標籤 tag_list = response.css('.entry-meta-hide-on-mobile a::text').extract() tag_list = [element for element in tag_list if not element.strip().endwith('評論')] tags = ",".join(tag_list) pass
小弟學習利用scrapy爬取jobbole所有文章的資訊,想確認具體頁面資訊爬取是否正確,進行單步除錯的時候從
yield Request(url=parse.urljoin(response.url, post_url), meta={"front_image_url": image_url},
callback=self.parse_detail)
欄位跳轉不到parse_detail函式中,在函式中也打了斷點但時就是在Request欄位反覆for迴圈,進入不到parse_detail中,請問各位大佬有什麼好的辦法