如何利用Python进行基础自动点击爬虫

Posted 2021-12-22 Jacky0907

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了如何利用Python进行基础自动点击爬虫相关的知识，希望对你有一定的参考价值。

如何利用Python进行基础自动点击爬虫

公司里面有很多工作都是重复性的，繁琐的，因此，为了节省大家的工作时间，现可利用python技术，使大家能够实现“自动化”点击操作，大大减少了需要自动点击的工作量，下述，我们会以“客服系统自动点击程序”为例，给大家讲一下如何创建“自动化编程”

第一步：导入相关的工具类

#导入相关的工具类
from bs4 import BeautifulSoup
from selenium import webdriver
import selenium
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
import json
import requests
import demjson
import datetime
import time
import re
import copy
from lxml import etree
import os
import time
import pandas as pd
import openpyxl
import pymysql

import glob


from sqlalchemy import create_engine
# from docx import Document
from selenium.webdriver.common.keys import Keys 
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

第二步：利用pandas类打开需要操作的EXCEL文档（如果需要）

#利用pd类的read_excel方法，把括号内填入路径的文件导入到程序中，新增变量df进行“接收”
df=pd.read_excel(r\'增值服务取消审核单(1).xlsx\')
#输出excel表数据
df

第三步：打开“自动化浏览器”

seleniumGoo=webdriver.Chrome()
seleniumGoo.maximize_window()
seleniumGoo.get("http://10.192.48.82/index")

第四步：网页中输入相关信息

在自动化网页后，输入自己的账号信息，并到达相应的网页

第五步：编写核心爬虫代码，并进行调试

       seleniumGoo.switch_to_window(handles[0])
       search_result=seleniumGoo.find_element_by_xpath(\'//*[@id="workidNumber"]\')
       search_result.click()
       search_result.clear()
       search_result.send_keys(df[\'案件号\'][i])

       search_result=seleniumGoo.find_element_by_xpath(\'//*[@id="search"]\')
       search_result.click()

       #点击继续访问网页
       seleniumGoo.find_element_by_xpath(\'//*[@id="bootStrap"]/tbody/tr/td[12]/a[2]\')click()
       #browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'//*[@id="bootStrap"]/tbody/tr/td[12]/a[2]\'))).click() 


       handles = seleniumGoo.window_handles
       print(handles[1])
       seleniumGoo.switch_to_window(handles[1])
       time.sleep(1)
       browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'//*[@id="quanju"]/div[2]/div[6]/label[2]\'))).click() 
       time.sleep(1)
       browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'//*[@id="quXiao"]/option[2]\'))).click() 
       time.sleep(0.5)
       browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\' //*[@id="subReset1"]\'))).click()
       time.sleep(1)
       browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'//*[@id="subReset"]\'))).click()
       time.sleep(1)
       browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'/html/body/div[3]/div[2]/div[2]/button\'))).click()
       time.sleep(1)
       handles = seleniumGoo.window_handles
       seleniumGoo.switch_to_window(handles[0])

第六步：将核心代码进行函数封装

以客服系统为例：

def  hexin():
        seleniumGoo.switch_to_window(handles[0])
        search_result=seleniumGoo.find_element_by_xpath(\'//*[@id="workidNumber"]\')
        search_result.click()
        search_result.clear()
        search_result.send_keys(df[\'案件号\'][i])

        search_result=seleniumGoo.find_element_by_xpath(\'//*[@id="search"]\')
        search_result.click()

        #点击继续访问网页
        seleniumGoo.find_element_by_xpath(\'//*[@id="bootStrap"]/tbody/tr/td[12]/a[2]\')click()
        #browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'//*[@id="bootStrap"]/tbody/tr/td[12]/a[2]\'))).click() 


        handles = seleniumGoo.window_handles
        print(handles[1])
        seleniumGoo.switch_to_window(handles[1])
        time.sleep(1)
        browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'//*[@id="quanju"]/div[2]/div[6]/label[2]\'))).click() 
        time.sleep(1)
        browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'//*[@id="quXiao"]/option[2]\'))).click() 
        time.sleep(0.5)
        browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\' //*[@id="subReset1"]\'))).click()
        time.sleep(1)
        browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'//*[@id="subReset"]\'))).click()
        time.sleep(1)
        browser_sel = WebDriverWait(seleniumGoo, 40).until(EC.element_to_be_clickable((By.XPATH,\'/html/body/div[3]/div[2]/div[2]/button\'))).click()
        time.sleep(1)
        handles = seleniumGoo.window_handles
        seleniumGoo.switch_to_window(handles[0])

第七步：编写自动化循环方法，并将封装好的“核心代码”放入其中

def  autoLoop(df):
    #读取df表的行数，来判定for循环的次数
    for i in range(len(df)):
        try:
            #爬虫核心代码
            hexin()
        except:
            #如遇到错误，则继续下一条记录
            continue

第八步：运行autoLoop自动化循环方法

#运行autoLoop方法
autoLoop()

这就是Python进行基础自动点击爬虫啦

以上是关于如何利用Python进行基础自动点击爬虫的主要内容，如果未能解决你的问题，请参考以下文章

零基础入门Python爬虫[1]前言

利用Python网络爬虫抓取微信好友的签名及其可视化展示

如何利用Python词云和wordart可视化工具对朋友圈数据进行可视化展示