Is this its limit?

Posted rsapaper

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Is this its limit?相关的知识,希望对你有一定的参考价值。

 

import sys
import os

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.firefox.options import Options
import time
from time import sleep
import math
import random
import sys
import threading
import urllib.parse
import xlrd
import sys
import os
import sqlite3

MAX_TIME = 600


def py_stop_update_db():
    # db = \'py_bdspider_status.db\'
    # db = \'%s\\\\%s\' % (curPath, db)
    # conn = sqlite3.connect(db)
    # pyname = os.path.basename(__file__).split(\'.py\')[0]
    # sql_ = \'%s%s%s\' % (\'UPDATE pystatus_table SET pystatus =2 WHERE pyname="\', pyname, \'"\')
    # print(sql_)
    # conn.execute(sql_)
    # conn.commit()
    # conn.close()
    return


def chk_time(browser, start_time):
    if time.time() - start_time > MAX_TIME:
        py_stop_update_db()
        browser.delete_all_cookies()
        browser.quit()
    return


dir_html = \'baidu_map_html_firstpage_pc_not_shop\'
filepath = \'%s\\\\%s\' % (curPath, dir_html)
requested_file_list = []
pathDir = os.listdir(filepath)
for allDir in pathDir:
    child = os.path.join(\'%s%s\' % (filepath, allDir))
    requested_file = child.split(dir_html)[1].split(\'&\')[0].split(\'.html\')[0]
    requested_file_list.append(requested_file)

tag_jmtool_list = [\'(\', \'(\', \'-\']


def extract_name(name_):
    for i in tag_jmtool_list:
        name_ = name_.split(i)[0]
    return name_


pcity_list = []
pcity_file = \'%s\\\\%s\' % (curPath, \'省会城市.txt\')
with open(pcity_file, \'r\', encoding=\'utf-8\') as pf:
    c_ = 0
    for i in pf:
        c_ += 1
        if c_ == 3:
            c_ = 0
            pcity_list.append(i.replace(\' \', \'\').replace(\'\\n\', \'\') + \'市\')
pcity_sorted_list = sorted(pcity_list)

target_type_list = [\'住宅小区\', \'写字楼\']
# target_type_list = [\'住宅小区\']
target_type_list = [\'专科医院\']
target_type_list = [\'商场\']
requested_type_counter = 0
# 商场 4705 酒店 24915 专科医院 2513 商圈 334
target_dic = {}
# target_city_list = [\'北京市\', \'上海市\', \'深圳市\', \'广州市\']
target_city_list = [\'深圳市\', \'广州市\']
target_city_list = [\'深圳市\']
target_city_list = [\'北京市\', \'上海市\']
target_city_list = [\'北京市\', \'上海市\', \'深圳市\', \'广州市\']
target_city_list = [\'北京市\', \'上海市\']
target_city_list = [\'深圳市\', \'广州市\']
target_city_list = [\'北京市\']
target_city_list = [\'北京市\', \'上海市\', \'深圳市\', \'广州市\']
target_city_list = pcity_sorted_list[21:28]
#pcity_sorted_list[7:14]
#target_city_list = pcity_sorted_list

# target_city_list = [\'杭州市\']
file_name = \'JMTool任务_csv_py_wholeCSV\'

FEXCEL = \'%s\\\\%s%s\' % (curPath, file_name, \'.xlsx\')
data = xlrd.open_workbook(FEXCEL)
table = data.sheets()[0]
nrows, ncols = table.nrows, table.ncols
res_dic, counter_ = {}, 0
for i in range(0, nrows):
    l = table.row_values(i)
    dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, emp_, emp_1 = l
    if city not in target_city_list:
        continue
    # if city not in target_city_list:
    #     target_city_list.append(city)
    type_ = ref_area_type_code
    if type_ not in target_type_list:
        continue
    name_ = name_.replace(\'?\', \'\')
    name_reduction = extract_name(name_)
    if len(name_reduction) < 3:
        name_reduction = name_
    if city not in target_dic:
        target_dic[city] = {}
    if district not in target_dic[city]:
        target_dic[city][district] = {}
    if type_ not in target_dic[city][district]:
        target_dic[city][district][type_] = {}
    if name_reduction not in target_dic[city][district]:
        target_dic[city][district][type_][name_reduction] = {}
        target_dic[city][district][type_][name_reduction][\'name_reduction_list\'] = []
        target_dic[city][district][type_][name_reduction][\'history_list\'] = []
    try:
        target_dic[city][district][type_][name_reduction][\'name_reduction_list\'].append(name_)
        target_dic[city][district][type_][name_reduction][\'history_list\'].append(l)
    except Exception:
        print(Exception)

write_res_html_dir = \'%s\\\\%s\\\\\' % (curPath, dir_html)


def write_res_html(browser, dir_=write_res_html_dir):
    close_alert(browser)
    current_url_ = urllib.parse.unquote(browser.current_url)
    try:
        input_ = current_url_.split(\'&wd=\')[1].split(\'/?\')[0]
    except Exception:
        print(\'Exception-\', __file__, sys._getframe().f_lineno, current_url_)
        return
    current_url_ = \'%s%s%s\' % (\'<!--\', browser.current_url, \'-->\')
    page_source = \'%s%s\' % (current_url_, browser.page_source)
    # localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    # file_name = \'%s%s%s%s\' % (dir_, input_, localtime_, \'.html\')
    file_name = \'%s%s%s\' % (dir_, input_, \'.html\')
    fo = open(file_name, \'w\', encoding=\'utf-8\')
    fo.write(page_source)
    fo.closed
    print(os.path.basename(__file__), \'OK-writed-\', sys._getframe().f_lineno, \'\')


def gen_random_letter():
    return chr(random.randint(97, 122))


def gen_random_num():
    return random.randint(0, 10)


def gen_sougo_pid():
    res_ = \'\'
    for i in range(1, 17, 1):
        if i in [1, 3, 4, 15]:
            res_ = \'%s%s\' % (res_, gen_random_letter())
        else:
            res_ = \'%s%s\' % (res_, gen_random_num())
    return res_


def close_alert(browser, attitude=\'accept\'):
    return


# executable_path_str = \'%s\\\\%s\' % (curPath, \'geckodriver.exe\')
# browser = webdriver.Firefox(executable_path=executable_path_str)



def mobile_mobile_pages_html(browser, input_):
    start_time = time.time()
    chk_time(browser, start_time)
    sleep(3)
    url_ = \'%s%s\' % (\'http://map.baidu.com/?s=s%26wd%3D\', input_)
    browser.get(url_)
    write_res_html(browser)


class MyThread(threading.Thread):
    def __init__(self, func, args, name):
        threading.Thread.__init__(self)
        self.name, self.func, self.args = name, func, args

    def run(self):
        self.func(self.args)


def thread_city(city):
    global requested_type_counter
    for district in target_dic[city]: 
        for type_ in target_dic[city][district]:
            for name_reduction in target_dic[city][district][type_]:
                for name_ in target_dic[city][district][type_][name_reduction][\'name_reduction_list\']:
                    input_ = \'%s%s%s\' % (city, district, name_)
                    if input_ in requested_file_list:
                        requested_type_counter += 1
                        print(\'requested_type_counter=\', requested_type_counter, input_)
                    else:
                        # executable_path_str = \'%s\\\\%s\' % (curPath, \'chromedriver.exe\')
                        # browser = webdriver.Chrome(executable_path=executable_path_str)

                        executable_path_str = \'%s\\\\%s\' % (curPath, \'geckodriver.exe\')
                        browser = webdriver.Firefox(executable_path=executable_path_str)
                        mobile_mobile_pages_html(browser, input_)


threads_list = []
for city in target_dic:
    thread_instance = MyThread(thread_city, (city), thread_city.__name__)
    threads_list.append(thread_instance)
for t in threads_list:
    t.setDaemon = False
    t.start()
for t in threads_list:
    t.join()

# browser.delete_all_cookies()
# browser.quit()

  

 

 

 

以上是关于Is this its limit?的主要内容,如果未能解决你的问题,请参考以下文章

this is和it is使用上有啥区别?一般怎么用?

Celebrate it, this is my first time on this blog.

It is possible that this issue is resolved by uninstalling an existing version of the apk if it is p

(转)mac 搭建基于RTMP的本地Nginx服务器报错homebrew/nginx was deprecated. This tap is now empty as all its form(代码片

It is possible that this issue is resolved by uninstalling an existi

Installation failed with message...It is possible that this issue is resolved by uninstalling an exi