python novel_spider.py

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python novel_spider.py相关的知识,希望对你有一定的参考价值。

# -*- coding: utf-8 -*-

import re
import scrapy
from scrapy.utils.response import get_base_url

from pyquery import PyQuery

from biquge.items import BiqugeItem

class BiqugeSpider(scrapy.Spider):
    name = 'biquge'
    allowed_domains = ["biquge.com.tw"]
    base_url = 'http://www.biquge.com.tw/'
    book_id = '0_681'

    def start_requests(self):
        url = self.base_url + self.book_id.strip('/') + '/'
        yield scrapy.Request(url, callback=self.parse)

    def parse(self, response):
        # self.log(response.url)

        jq = PyQuery(response.text)
        sel = jq('#list dl dd a')
        for index,s in enumerate(sel):
            url = self.base_url + s.get('href')
            # title = s.text
            yield scrapy.Request(url, callback=self.parse_detail, meta={
                    'index': index,
                    # 'title': title,
                })

    def parse_detail(self, response):
        # self.log(response.url)
        item = BiqugeItem()

        item['book_id'] = self.book_id
        item['index'] = response.meta['index']
        item['title'] = response.css('.bookname').xpath('./h1/text()').extract_first()
        item['content'] = ''.join(response.xpath('//*[@id="content"]/text()').extract())
        yield item

以上是关于python novel_spider.py的主要内容,如果未能解决你的问题,请参考以下文章

001--python全栈--基础知识--python安装

Python代写,Python作业代写,代写Python,代做Python

Python开发

Python,python,python

Python 介绍

Python学习之认识python