世界500强
Posted yuanjia8888
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了世界500强相关的知识,希望对你有一定的参考价值。
import psycopg2
import json
from news_project.config.sql_log import log
from news_project.middlewares import Deal_Content
class NewsProjectPipeline(object):
def open_spider(self, spider):
l = self.l = log()
self.conn = psycopg2.connect(database=l.database, user=l.user, password=l.password, host=l.host, port=l.port)
def process_item(self, item, spider):
l = self.l = log()
self.conn = psycopg2.connect(database=l.database, user=l.user, password=l.password, host=l.host, port=l.port)
self.cur = self.conn.cursor()
item = dict(item)
d = Deal_Content()
item[‘time‘] = d.handleTime(item[‘time‘],item[‘title_url‘]) #修改时间格式
print("item*************************************///////////////////////", item[‘time‘])
for i in item.keys():
if item[i] == "" or item[i] == None:
item[i] = None
if item[‘type_cn‘] == None:
item[‘type_cn‘] = "行业新闻"
if item[‘type_no‘] == None:
item[‘type_no‘] = 16
if item[‘content‘] == None:
return 0
#两种存储状态。
if not item.get(‘association_id‘):
self.cur.execute(
"INSERT INTO bjzs_big_data.baoji_news(type_cn,source,level2,level1,event_time,title,url,content,lable,type_no) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
(item[‘type_cn‘], item[‘news‘], item[‘id‘], item[‘pid‘], item[‘time‘], item[‘title‘], item[‘title_url‘],
item[‘content‘], item[‘tags‘], item[‘type_no‘]))
elif item.get(‘association_id‘):
self.cur.execute(
"INSERT INTO bjzs_big_data.baoji_news(type_cn,source,level2,level1,event_time,title,url,content,lable,type_no,association_id) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",(item[‘type_cn‘],item[‘news‘],item[‘id‘],item[‘pid‘],item[‘time‘],item[‘title‘],item[‘title_url‘],item[‘content‘],item[‘tags‘],item[‘type_no‘],item[‘association_id‘]))
else:
pass
#提交
self.conn.commit()
self.cur.close()
self.conn.close()
return item
def close_spider(self, spider):
self.conn.close()
以上是关于世界500强的主要内容,如果未能解决你的问题,请参考以下文章