scrapy 抓取拉鉤 ajax
阿新 • • 發佈:2018-12-12
# -*- coding: utf-8 -*- import scrapy from LagouSpider.items import LagouspiderItem import json class LagouSpider(scrapy.Spider): name = 'lagou' # allowed_domains = ['lagou.com'] url = 'https://www.lagou.com/jobs/positionAjax.json?' page = 2 allpage = 0 def start_requests(self): yield scrapy.FormRequest(self.url, formdata={ 'first': 'false', 'pn': str(self.page), 'kd': 'python', 'city': '北京' }, callback=self.parse) def parse(self, response): item = LagouspiderItem() data = json.loads(response.body) result = data['content']['positionResult']['result'] for each in result: item['city'] = each['city'] item['money'] = each['salary'] yield item
需在setting中設定headers:
DEFAULT_REQUEST_HEADERS = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Referer': 'https://www.lagou.com/jobs/list_Python?labelWords=&fromSearch=true&suginput=1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66\ .0.3359.181 Safari/537.36' }