scrapy inline_requests 多次請求連用
阿新 • • 發佈:2019-01-24
ber ext time ges Coding ESS sel req weight
# -*- coding: utf-8 -*- import json import time import scrapy from inline_requests import inline_requests class CoscoSpider(scrapy.Spider): name = ‘cosco‘ allowed_domains = [‘elines.coscoshipping.com‘] start_urls = [‘http://elines.coscoshipping.com/ebusiness/vesselParticulars/vesselParticularsByServices‘] def parse(self, response): t = int(time.time() * 10000) url = ‘http://elines.coscoshipping.com/ebbase/public/general/findLineGroup?timestamp={}‘.format(t) yield scrapy.Request(url=url, callback=self.parse_zhou_line, meta={‘t‘: t}) # 解析洲信息 @inline_requests def parse_zhou_line(self, response): t= response.meta[‘t‘] data_line = json.loads(response.text) #獲取大洲信息 zhou_line_all = data_line.get(‘data‘).get(‘content‘) for zhou_line in zhou_line_all: # 洲名 zhou_name = zhou_line.get(‘description‘) # 洲代碼 line_code = zhou_line.get(‘code‘) url = ‘http://elines.coscoshipping.com/ebbase/public/general/findLines?lineCode={}×tamp={}‘.format( line_code, t) response = yield scrapy.Request(method=‘GET‘, url=url, dont_filter=True) zhou_de = json.loads(response.text) zhou_line_detail = zhou_de.get(‘data‘).get(‘content‘) for line_detail in zhou_line_detail: # 創建次航線名字典 code = line_detail.get(‘code‘) url = ‘http://elines.coscoshipping.com/ebbase/public/vesselParticulars/search?pageSize=3‘ ‘&pageNum=1&state=lines&code={}×tamp={}‘.format(code, t) response = yield scrapy.Request(url=url, dont_filter=True) voyage_de = json.loads(response.text) voyage_line_detail = voyage_de.get(‘data‘).get(‘content‘) if voyage_line_detail: for voyage in voyage_line_detail: # 遍歷州航線列表詳情,獲取航線信息 voyage_line = dict() voyage_line[‘line_code‘] = voyage.get(‘serviceLoopAbbrv‘) voyage_line[‘voyage_code‘] = voyage.get(‘vesselCode‘) voyage_line[‘vessel_Name‘] = voyage.get(‘vesselName‘) voyage_line[‘lloyds_number‘] = voyage.get(‘lloydsNumber‘) voyage_line[‘flag‘] = voyage.get(‘flagCountry‘) voyage_line[‘built_year‘] = voyage.get(‘yearBuilt‘) voyage_line[‘callSign‘] = voyage.get(‘callSign‘) url = ‘http://elines.coscoshipping.com/ebbase/public/general/findVesselByCode?code={}×tamp={}‘.format( voyage_line[‘voyage_code‘], t) response = yield scrapy.Request(method=‘GET‘, url=url, dont_filter=True) # 空列表(船信息) voyage_line[‘vessel_info‘] = [] data = json.loads(response.text) # 船東 op_name = data.get(‘data‘).get(‘content‘).get(‘optName‘) # 所有人 owner = data.get(‘data‘).get(‘content‘).get(‘owner‘) # 註冊港 registry_port = data.get(‘data‘).get(‘content‘).get(‘registryPort‘) # 船籍社 class_society = data.get(‘data‘).get(‘content‘).get(‘classSociety‘) # 總重 weight = data.get(‘data‘).get(‘content‘).get(‘grossTonnage‘) # 凈重 net_tonnage = data.get(‘data‘).get(‘content‘).get(‘netTonnage‘) # 裝載量 to_teuCap = data.get(‘data‘).get(‘content‘).get(‘totTeuCap‘) # 最大航速 max_speed = data.get(‘data‘).get(‘content‘).get(‘maxSpeed‘) vessel_detail = { ‘op_name‘: op_name, ‘owner‘: owner, ‘registry_port‘: registry_port, ‘class_society‘: class_society, ‘weight‘: weight, ‘net_tonnage‘: net_tonnage, ‘to_teuCap‘: to_teuCap, ‘max_speed‘: max_speed, } voyage_line[‘vessel_info‘].append(vessel_detail) yield voyage_line
scrapy inline_requests 多次請求連用