111111111111111
Posted jinmubaobao
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了111111111111111相关的知识,希望对你有一定的参考价值。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
from lxml import etree
from selenium import webdriver
from pymongo import MongoClient
from set_order_date00 import set_data
from add_order_to_mssql01 import order_to_mssql
from add_sales_data02 import deal_eb_vendor
# 连接数据库
client = MongoClient(host=‘192.168.6.232‘)
db = client.gw_scrapy
LOGIN_URI = ‘https://login.esgcc.com.cn/mallLogin‘
# chrome_driver = None
# 请求头
headers = "User-Agent": "Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (Khtml, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"
def get_login_cookies():
# global chrome_driver
options = webdriver.ChromeOptions()
prefs =
‘profile.default_content_setting_values‘:
‘images‘: 2
#options.add_experimental_option(‘prefs‘, prefs)
options.add_argument(‘lang=zh_CN.UTF-8‘)
options.add_argument("--proxy-server=socks5://192.168.6.168:1024")
options.add_argument(‘user-agent="Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"‘)
# chrome_driver = webdriver.Chrome(executable_path=r"E:\work\jishen\chromedriver", chrome_options=options)
chrome_driver = webdriver.Chrome(executable_path=r"C:\Users\Administrator\Desktop\国网订单抓取\chromedriver", chrome_options=options)
chrome_driver.get(LOGIN_URI)
chrome_driver.implicitly_wait(60)
chrome_driver.find_element_by_id(‘username‘).send_keys(‘HLJ_DSCX‘)
chrome_driver.find_element_by_id(‘password‘).send_keys(‘1234qwer‘)
chrome_driver.find_element_by_class_name(‘login_btn‘).click()
userid_els = chrome_driver.find_elements_by_id(‘hiddenUserIdInTopBar‘)
if len(userid_els) != 1:
raise Exception(‘登陆失败‘)
cookies_list = chrome_driver.get_cookies()
chrome_driver.quit()
chrome_driver = None
return i["name"]:i["value"] for i in cookies_list
def get_data(cookies):
ret = requests.get("http://b.esgcc.com.cn/showIndex/getLastestOrdersInner.htm?pgn=1", cookies=cookies,
headers=headers, proxies=‘http‘: ‘192.168.6.168:1025‘)
tree = etree.HTML(ret.text)
page_num = tree.xpath(‘//div[@class="page_wrap"]//a/text()‘)[-2]
print(page_num)
for page in range(1,int(page_num)+1):
ret = requests.get(f"http://b.esgcc.com.cn/showIndex/getLastestOrdersInner.htm?pgn=page",cookies=cookies,headers=headers,proxies=‘http‘: ‘192.168.6.168:1025‘)
tree = etree.HTML(ret.text)
print(f"http://b.esgcc.com.cn/showIndex/getLastestOrdersInner.htm?pgn=page")
line = tree.xpath(‘//table[@class="info_open_table"]/tbody/tr‘)
if len(line) == 0:
raise Exception(‘页面未发现数据元素‘)
for i in range(1, len(line) + 1):
item = tree.xpath(f‘//table[@class="info_open_table"]/tbody/tr[i]/td/text()‘)
data =
"订单编号": item[1],
"订单金额": item[2],
"采购账号": item[3],
"所属单位": item[4],
"商品名称": item[5],
"店铺名称": item[6],
if db.order.find_one(‘order_id‘: data["订单编号"]) is None:
db.order.insert_one(
‘order_id‘: data["订单编号"],
‘date‘: ‘‘,
‘user_name‘: data["采购账号"],
‘customer‘: data["所属单位"],
‘supplier‘: data["店铺名称"],
‘goods_name‘: data["商品名称"],
‘amount‘: data["订单金额"],
)
print("存储成功")
if __name__ == "__main__":
while True:
try:
print("开始")
cookies = get_login_cookies()
print(cookies)
get_data(cookies)
# 数据处理
set_data()
order_to_mssql()
deal_eb_vendor()
print("结束")
except Exception as e:
print(e)
continue
以上是关于111111111111111的主要内容,如果未能解决你的问题,请参考以下文章