python novel_spider.py
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python novel_spider.py相关的知识,希望对你有一定的参考价值。
# -*- coding: utf-8 -*-
import re
import scrapy
from scrapy.utils.response import get_base_url
from pyquery import PyQuery
from biquge.items import BiqugeItem
class BiqugeSpider(scrapy.Spider):
name = 'biquge'
allowed_domains = ["biquge.com.tw"]
base_url = 'http://www.biquge.com.tw/'
book_id = '0_681'
def start_requests(self):
url = self.base_url + self.book_id.strip('/') + '/'
yield scrapy.Request(url, callback=self.parse)
def parse(self, response):
# self.log(response.url)
jq = PyQuery(response.text)
sel = jq('#list dl dd a')
for index,s in enumerate(sel):
url = self.base_url + s.get('href')
# title = s.text
yield scrapy.Request(url, callback=self.parse_detail, meta={
'index': index,
# 'title': title,
})
def parse_detail(self, response):
# self.log(response.url)
item = BiqugeItem()
item['book_id'] = self.book_id
item['index'] = response.meta['index']
item['title'] = response.css('.bookname').xpath('./h1/text()').extract_first()
item['content'] = ''.join(response.xpath('//*[@id="content"]/text()').extract())
yield item
以上是关于python novel_spider.py的主要内容,如果未能解决你的问题,请参考以下文章
001--python全栈--基础知识--python安装
Python代写,Python作业代写,代写Python,代做Python
Python开发
Python,python,python
Python 介绍
Python学习之认识python