Is this its limit?
Posted rsapaper
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Is this its limit?相关的知识,希望对你有一定的参考价值。
import sys import os curPath = os.path.abspath(os.path.dirname(__file__)) rootPath = os.path.split(curPath)[0] sys.path.append(rootPath) from selenium import webdriver from selenium.webdriver.chrome.options import Options # from selenium.webdriver.firefox.options import Options import time from time import sleep import math import random import sys import threading import urllib.parse import xlrd import sys import os import sqlite3 MAX_TIME = 600 def py_stop_update_db(): # db = \'py_bdspider_status.db\' # db = \'%s\\\\%s\' % (curPath, db) # conn = sqlite3.connect(db) # pyname = os.path.basename(__file__).split(\'.py\')[0] # sql_ = \'%s%s%s\' % (\'UPDATE pystatus_table SET pystatus =2 WHERE pyname="\', pyname, \'"\') # print(sql_) # conn.execute(sql_) # conn.commit() # conn.close() return def chk_time(browser, start_time): if time.time() - start_time > MAX_TIME: py_stop_update_db() browser.delete_all_cookies() browser.quit() return dir_html = \'baidu_map_html_firstpage_pc_not_shop\' filepath = \'%s\\\\%s\' % (curPath, dir_html) requested_file_list = [] pathDir = os.listdir(filepath) for allDir in pathDir: child = os.path.join(\'%s%s\' % (filepath, allDir)) requested_file = child.split(dir_html)[1].split(\'&\')[0].split(\'.html\')[0] requested_file_list.append(requested_file) tag_jmtool_list = [\'(\', \'(\', \'-\'] def extract_name(name_): for i in tag_jmtool_list: name_ = name_.split(i)[0] return name_ pcity_list = [] pcity_file = \'%s\\\\%s\' % (curPath, \'省会城市.txt\') with open(pcity_file, \'r\', encoding=\'utf-8\') as pf: c_ = 0 for i in pf: c_ += 1 if c_ == 3: c_ = 0 pcity_list.append(i.replace(\' \', \'\').replace(\'\\n\', \'\') + \'市\') pcity_sorted_list = sorted(pcity_list) target_type_list = [\'住宅小区\', \'写字楼\'] # target_type_list = [\'住宅小区\'] target_type_list = [\'专科医院\'] target_type_list = [\'商场\'] requested_type_counter = 0 # 商场 4705 酒店 24915 专科医院 2513 商圈 334 target_dic = {} # target_city_list = [\'北京市\', \'上海市\', \'深圳市\', \'广州市\'] target_city_list = [\'深圳市\', \'广州市\'] target_city_list = [\'深圳市\'] target_city_list = [\'北京市\', \'上海市\'] target_city_list = [\'北京市\', \'上海市\', \'深圳市\', \'广州市\'] target_city_list = [\'北京市\', \'上海市\'] target_city_list = [\'深圳市\', \'广州市\'] target_city_list = [\'北京市\'] target_city_list = [\'北京市\', \'上海市\', \'深圳市\', \'广州市\'] target_city_list = pcity_sorted_list[21:28] #pcity_sorted_list[7:14] #target_city_list = pcity_sorted_list # target_city_list = [\'杭州市\'] file_name = \'JMTool任务_csv_py_wholeCSV\' FEXCEL = \'%s\\\\%s%s\' % (curPath, file_name, \'.xlsx\') data = xlrd.open_workbook(FEXCEL) table = data.sheets()[0] nrows, ncols = table.nrows, table.ncols res_dic, counter_ = {}, 0 for i in range(0, nrows): l = table.row_values(i) dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, emp_, emp_1 = l if city not in target_city_list: continue # if city not in target_city_list: # target_city_list.append(city) type_ = ref_area_type_code if type_ not in target_type_list: continue name_ = name_.replace(\'?\', \'\') name_reduction = extract_name(name_) if len(name_reduction) < 3: name_reduction = name_ if city not in target_dic: target_dic[city] = {} if district not in target_dic[city]: target_dic[city][district] = {} if type_ not in target_dic[city][district]: target_dic[city][district][type_] = {} if name_reduction not in target_dic[city][district]: target_dic[city][district][type_][name_reduction] = {} target_dic[city][district][type_][name_reduction][\'name_reduction_list\'] = [] target_dic[city][district][type_][name_reduction][\'history_list\'] = [] try: target_dic[city][district][type_][name_reduction][\'name_reduction_list\'].append(name_) target_dic[city][district][type_][name_reduction][\'history_list\'].append(l) except Exception: print(Exception) write_res_html_dir = \'%s\\\\%s\\\\\' % (curPath, dir_html) def write_res_html(browser, dir_=write_res_html_dir): close_alert(browser) current_url_ = urllib.parse.unquote(browser.current_url) try: input_ = current_url_.split(\'&wd=\')[1].split(\'/?\')[0] except Exception: print(\'Exception-\', __file__, sys._getframe().f_lineno, current_url_) return current_url_ = \'%s%s%s\' % (\'<!--\', browser.current_url, \'-->\') page_source = \'%s%s\' % (current_url_, browser.page_source) # localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime()) # file_name = \'%s%s%s%s\' % (dir_, input_, localtime_, \'.html\') file_name = \'%s%s%s\' % (dir_, input_, \'.html\') fo = open(file_name, \'w\', encoding=\'utf-8\') fo.write(page_source) fo.closed print(os.path.basename(__file__), \'OK-writed-\', sys._getframe().f_lineno, \'\') def gen_random_letter(): return chr(random.randint(97, 122)) def gen_random_num(): return random.randint(0, 10) def gen_sougo_pid(): res_ = \'\' for i in range(1, 17, 1): if i in [1, 3, 4, 15]: res_ = \'%s%s\' % (res_, gen_random_letter()) else: res_ = \'%s%s\' % (res_, gen_random_num()) return res_ def close_alert(browser, attitude=\'accept\'): return # executable_path_str = \'%s\\\\%s\' % (curPath, \'geckodriver.exe\') # browser = webdriver.Firefox(executable_path=executable_path_str) def mobile_mobile_pages_html(browser, input_): start_time = time.time() chk_time(browser, start_time) sleep(3) url_ = \'%s%s\' % (\'http://map.baidu.com/?s=s%26wd%3D\', input_) browser.get(url_) write_res_html(browser) class MyThread(threading.Thread): def __init__(self, func, args, name): threading.Thread.__init__(self) self.name, self.func, self.args = name, func, args def run(self): self.func(self.args) def thread_city(city): global requested_type_counter for district in target_dic[city]: for type_ in target_dic[city][district]: for name_reduction in target_dic[city][district][type_]: for name_ in target_dic[city][district][type_][name_reduction][\'name_reduction_list\']: input_ = \'%s%s%s\' % (city, district, name_) if input_ in requested_file_list: requested_type_counter += 1 print(\'requested_type_counter=\', requested_type_counter, input_) else: # executable_path_str = \'%s\\\\%s\' % (curPath, \'chromedriver.exe\') # browser = webdriver.Chrome(executable_path=executable_path_str) executable_path_str = \'%s\\\\%s\' % (curPath, \'geckodriver.exe\') browser = webdriver.Firefox(executable_path=executable_path_str) mobile_mobile_pages_html(browser, input_) threads_list = [] for city in target_dic: thread_instance = MyThread(thread_city, (city), thread_city.__name__) threads_list.append(thread_instance) for t in threads_list: t.setDaemon = False t.start() for t in threads_list: t.join() # browser.delete_all_cookies() # browser.quit()
以上是关于Is this its limit?的主要内容,如果未能解决你的问题,请参考以下文章
Celebrate it, this is my first time on this blog.
It is possible that this issue is resolved by uninstalling an existing version of the apk if it is p
(转)mac 搭建基于RTMP的本地Nginx服务器报错homebrew/nginx was deprecated. This tap is now empty as all its form(代码片
It is possible that this issue is resolved by uninstalling an existi
Installation failed with message...It is possible that this issue is resolved by uninstalling an exi