scrapy inline_requests 多次请求连用
Posted linpd
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了scrapy inline_requests 多次请求连用相关的知识,希望对你有一定的参考价值。
# -*- coding: utf-8 -*- import json import time import scrapy from inline_requests import inline_requests class CoscoSpider(scrapy.Spider): name = ‘cosco‘ allowed_domains = [‘elines.coscoshipping.com‘] start_urls = [‘http://elines.coscoshipping.com/ebusiness/vesselParticulars/vesselParticularsByServices‘] def parse(self, response): t = int(time.time() * 10000) url = ‘http://elines.coscoshipping.com/ebbase/public/general/findLineGroup?timestamp={}‘.format(t) yield scrapy.Request(url=url, callback=self.parse_zhou_line, meta={‘t‘: t}) # 解析洲信息 @inline_requests def parse_zhou_line(self, response): t = response.meta[‘t‘] data_line = json.loads(response.text) #获取大洲信息 zhou_line_all = data_line.get(‘data‘).get(‘content‘) for zhou_line in zhou_line_all: # 洲名 zhou_name = zhou_line.get(‘description‘) # 洲代码 line_code = zhou_line.get(‘code‘) url = ‘http://elines.coscoshipping.com/ebbase/public/general/findLines?lineCode={}×tamp={}‘.format( line_code, t) response = yield scrapy.Request(method=‘GET‘, url=url, dont_filter=True) zhou_de = json.loads(response.text) zhou_line_detail = zhou_de.get(‘data‘).get(‘content‘) for line_detail in zhou_line_detail: # 创建次航线名字典 code = line_detail.get(‘code‘) url = ‘http://elines.coscoshipping.com/ebbase/public/vesselParticulars/search?pageSize=3‘ ‘&pageNum=1&state=lines&code={}×tamp={}‘.format(code, t) response = yield scrapy.Request(url=url, dont_filter=True) voyage_de = json.loads(response.text) voyage_line_detail = voyage_de.get(‘data‘).get(‘content‘) if voyage_line_detail: for voyage in voyage_line_detail: # 遍历州航线列表详情,获取航线信息 voyage_line = dict() voyage_line[‘line_code‘] = voyage.get(‘serviceLoopAbbrv‘) voyage_line[‘voyage_code‘] = voyage.get(‘vesselCode‘) voyage_line[‘vessel_Name‘] = voyage.get(‘vesselName‘) voyage_line[‘lloyds_number‘] = voyage.get(‘lloydsNumber‘) voyage_line[‘flag‘] = voyage.get(‘flagCountry‘) voyage_line[‘built_year‘] = voyage.get(‘yearBuilt‘) voyage_line[‘callSign‘] = voyage.get(‘callSign‘) url = ‘http://elines.coscoshipping.com/ebbase/public/general/findVesselByCode?code={}×tamp={}‘.format( voyage_line[‘voyage_code‘], t) response = yield scrapy.Request(method=‘GET‘, url=url, dont_filter=True) # 空列表(船信息) voyage_line[‘vessel_info‘] = [] data = json.loads(response.text) # 船东 op_name = data.get(‘data‘).get(‘content‘).get(‘optName‘) # 所有人 owner = data.get(‘data‘).get(‘content‘).get(‘owner‘) # 注册港 registry_port = data.get(‘data‘).get(‘content‘).get(‘registryPort‘) # 船籍社 class_society = data.get(‘data‘).get(‘content‘).get(‘classSociety‘) # 总重 weight = data.get(‘data‘).get(‘content‘).get(‘grossTonnage‘) # 净重 net_tonnage = data.get(‘data‘).get(‘content‘).get(‘netTonnage‘) # 装载量 to_teuCap = data.get(‘data‘).get(‘content‘).get(‘totTeuCap‘) # 最大航速 max_speed = data.get(‘data‘).get(‘content‘).get(‘maxSpeed‘) vessel_detail = { ‘op_name‘: op_name, ‘owner‘: owner, ‘registry_port‘: registry_port, ‘class_society‘: class_society, ‘weight‘: weight, ‘net_tonnage‘: net_tonnage, ‘to_teuCap‘: to_teuCap, ‘max_speed‘: max_speed, } voyage_line[‘vessel_info‘].append(vessel_detail) yield voyage_line
以上是关于scrapy inline_requests 多次请求连用的主要内容,如果未能解决你的问题,请参考以下文章
四十二 Python分布式爬虫打造搜索引擎Scrapy精讲—elasticsearch(搜索引擎)的mget和bulk批量操作