python tci.py
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python tci.py相关的知识,希望对你有一定的参考价值。
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2016-04-28 06:16:32
# Project: TCI_Test
from pyspider.libs.base_handler import *
class Handler(BaseHandler):
headers= {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'zh-CN,zh;q=0.8',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36'
}
crawl_config = {
'headers' : headers,
'timeout' : 100
}
@every(minutes=24 * 60 * 30)
def on_start(self):
self.crawl('http://www.tcichemicals.com/eshop/zh/cn/category_index/00001/', callback=self.index_page)
self.crawl('http://www.tcichemicals.com/eshop/zh/cn/category_index/00002/', callback=self.index_page)
def index_page(self, response):
for each in response.doc('DIV#contents>DIV.section-box>DIV.text>table ul.mark a').items():
self.crawl(each.attr.href, callback=self.sub_index_page)
def sub_index_page(self, response):
sub_titles = list(response.doc('DIV#contents>DIV.section-box>h3.sub-titleA>a').items())
if len(sub_titles) > 0:
for each in sub_titles:
self.crawl(each.attr.href, callback=self.list_page)
else:
self.list_page(response)
def list_page(self, response):
for each in response.doc('dl.chem-name dd a').items():
self.crawl(each.attr.href, callback=self.detail_page)
def detail_page(self, response):
eng_name = ''
chn_name = ''
for each in response.doc('table.syg-tbl tr').items():
if each('th').text().find('中文') >= 0 and chn_name=='' :
chn_name = each('td').text()
elif each('th').text().find('英文') >= 0 and eng_name=='' :
eng_name = each('td').text()
return {
"url": response.url,
"chn_name": chn_name,
"eng_name": eng_name,
"cas": response.doc('form#cart DIV.section-box>table.base-tbl td>span').eq(3).text()
}
以上是关于python tci.py的主要内容,如果未能解决你的问题,请参考以下文章
001--python全栈--基础知识--python安装
Python代写,Python作业代写,代写Python,代做Python
Python开发
Python,python,python
Python 介绍
Python学习之认识python