scrapy inline_requests 多次请求连用

Posted linpd

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了scrapy inline_requests 多次请求连用相关的知识,希望对你有一定的参考价值。

# -*- coding: utf-8 -*-
import json
import time
import scrapy
from inline_requests import inline_requests


class CoscoSpider(scrapy.Spider):
    name = cosco
    allowed_domains = [elines.coscoshipping.com]
    start_urls = [http://elines.coscoshipping.com/ebusiness/vesselParticulars/vesselParticularsByServices]

    def parse(self, response):
        t = int(time.time() * 10000)
        url = http://elines.coscoshipping.com/ebbase/public/general/findLineGroup?timestamp={}.format(t)
        yield scrapy.Request(url=url, callback=self.parse_zhou_line, meta={t: t})

    # 解析洲信息
    @inline_requests
    def parse_zhou_line(self, response):
        t = response.meta[t]
        data_line = json.loads(response.text)
        #获取大洲信息
        zhou_line_all = data_line.get(data).get(content)
        for zhou_line in zhou_line_all:
            # 洲名
            zhou_name = zhou_line.get(description)
            # 洲代码
            line_code = zhou_line.get(code)

            url = http://elines.coscoshipping.com/ebbase/public/general/findLines?lineCode={}&timestamp={}.format(
                line_code, t)
            response = yield scrapy.Request(method=GET, url=url, dont_filter=True)
            zhou_de = json.loads(response.text)
            zhou_line_detail = zhou_de.get(data).get(content)
            for line_detail in zhou_line_detail:
                # 创建次航线名字典
                code = line_detail.get(code)
                url = http://elines.coscoshipping.com/ebbase/public/vesselParticulars/search?pageSize=3                       &pageNum=1&state=lines&code={}&timestamp={}.format(code, t)
                response = yield scrapy.Request(url=url, dont_filter=True)
                voyage_de = json.loads(response.text)
                voyage_line_detail = voyage_de.get(data).get(content)
                if voyage_line_detail:
                    for voyage in voyage_line_detail:
                        # 遍历州航线列表详情,获取航线信息
                        voyage_line = dict()
                        voyage_line[line_code] = voyage.get(serviceLoopAbbrv)
                        voyage_line[voyage_code] = voyage.get(vesselCode)
                        voyage_line[vessel_Name] = voyage.get(vesselName)
                        voyage_line[lloyds_number] = voyage.get(lloydsNumber)
                        voyage_line[flag] = voyage.get(flagCountry)
                        voyage_line[built_year] = voyage.get(yearBuilt)
                        voyage_line[callSign] = voyage.get(callSign)
                        url = http://elines.coscoshipping.com/ebbase/public/general/findVesselByCode?code={}&timestamp={}.format(
                            voyage_line[voyage_code], t)
                        response = yield scrapy.Request(method=GET, url=url, dont_filter=True)
                        # 空列表(船信息)
                        voyage_line[vessel_info] = []
                        data = json.loads(response.text)
                        # 船东
                        op_name = data.get(data).get(content).get(optName)
                        # 所有人
                        owner = data.get(data).get(content).get(owner)
                        # 注册港
                        registry_port = data.get(data).get(content).get(registryPort)
                        # 船籍社
                        class_society = data.get(data).get(content).get(classSociety)
                        # 总重
                        weight = data.get(data).get(content).get(grossTonnage)
                        # 净重
                        net_tonnage = data.get(data).get(content).get(netTonnage)
                        # 装载量
                        to_teuCap = data.get(data).get(content).get(totTeuCap)
                        # 最大航速
                        max_speed = data.get(data).get(content).get(maxSpeed)
                        vessel_detail = {
                            op_name: op_name,
                            owner: owner,
                            registry_port: registry_port,
                            class_society: class_society,
                            weight: weight,
                            net_tonnage: net_tonnage,
                            to_teuCap: to_teuCap,
                            max_speed: max_speed,
                        }
                        voyage_line[vessel_info].append(vessel_detail)
                        yield voyage_line

 

以上是关于scrapy inline_requests 多次请求连用的主要内容,如果未能解决你的问题,请参考以下文章

python实现scrapy定时执行爬虫

Scrapy爬虫库使用初体验

scrapy的启动及正则表达式

scrapy爬虫天猫笔记本电脑销量前60的商品

四十二 Python分布式爬虫打造搜索引擎Scrapy精讲—elasticsearch(搜索引擎)的mget和bulk批量操作

Scrapy爬虫:scrapy架构及原理