还在为写调查问卷发愁的你赶快来看看这个自动填写问卷(问卷星版)
Posted adventure.Li
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了还在为写调查问卷发愁的你赶快来看看这个自动填写问卷(问卷星版)相关的知识,希望对你有一定的参考价值。
一、背景
马克思实践报告需要,由于并不是需要严谨深入的研究调查,因此想到使用自动填写来完成,于是打开pycharm开整吧。
二、实现过程
- chromedriver+selenium填写(可以进行填写,但是发现问卷星反爬虫了,于是采用第二种办法)
import time
from selenium import webdriver
import asyncio
from pyppeteer import launch
from pyppeteer_stealth import stealth # 反爬虫第三方库
chrome_driver = r"E:\\PycharmProjects\\PythonBase\\crawler\\driver\\chromedriver.exe"
# 调用浏览器
browser = webdriver.Chrome(executable_path=chrome_driver)
url = "https://www.wjx.cn/vm/PG4tFZd.aspx"
page = browser.get(url)
stealth(page)
# browser.get(url)
# print("val:", browser.find_element_by_class_name("field ui-field-contain"))
while 1 == 1:
for i in range(10):
i = i + 1
idName = "div" + str(i)
print(idName)
try:
e = browser.find_element_by_id(idName)
# e.find_element_by_class_name("jqradio").click()
# radio类, 1:男女性别, 2:所在年级, 4:选择? 6:是否听说校园贷, 7:处理方式? 8:是否担心
# jcheck类: 3:事件(多选)
# 9:打分 rate-off
if i == 9:
print("打分rate-off类型")
elements = e.find_elements_by_class_name("rate-off")
j = 1
for detailE in elements:
print("测试" + str(j), detailE.text)
if j == 5:
detailE.click()
j = j + 1
elif i == 3:
print("check多选类型")
elements = e.find_elements_by_class_name("jqcheck")
j = 1
for detailE in elements:
print("测试" + str(j))
detailE.click()
j = j + 1
else:
print("radio类型")
elements = e.find_elements_by_class_name("jqradio")
j = 1
for detailE in elements:
print("测试" + str(j))
detailE.click()
j = j + 1
except Exception:
print("error")
print("enter....")
print(e.id)
time.sleep(3000)
- pyppeteer +stealth
分析页面:
进行试探点击看源代码反应,明确点击地方,然后获取下来id或class进行定位。(可能会出现断点阻断需要在source处点击一个如下图按钮然后点击调试)
import asyncio
from random import randint
from pyppeteer import launch
from pyppeteer_stealth import stealth # 反爬虫第三方库
import time
async def main():
# launch方法会新建一个browser对象,然后赋值给browser
browser = await launch({
# 路径就是你的谷歌浏览器的安装路径
'executablePath': 'C:\\\\Program Files\\\\Google\\\\Chrome\\\\Application\\\\chrome.exe',
# Pyppeteer 默认使用的是无头浏览器,所以要显示需要给False
'headless': False,
# 设置Windows-size和Viewport大小来实现网页完整显示
'args': ['--no-sandbox', '--window-size=1366,850']
})
url = "https://www.wjx.cn/vm/PG4tFZd.aspx"
# 调用 newPage 方法相当于浏览器中新建了一个选项卡,同时新建了一个Page对象
page = await browser.newPage()
await page.setViewport({'width': 1366, 'height': 768})
# 防止页面识别出脚本(反爬虫关键语句)
await stealth(page)
# 调用了Page对象的goto方法就相当于在浏览器中输入问卷的网址,浏览器跳转到了对应的页面进行加载
await page.goto(url)
# 单选题:先用page.querySelector(selector)找到指定的元素,再调用元素的click()方法, await?
# buttons = await page.querySelectorAll("#div1 .jqradio")
i = 1
for i in range(9):
i = i + 1
# 确定区域
id_name = "#div" + str(i)
print(id_name)
if i == 3:
# jqcheck
click_toolkit = id_name + ' .jqcheck'
checks = await page.querySelectorAll(click_toolkit)
for check in checks:
await check.click()
elif i == 9:
# rate-off
click_toolkit = id_name + ' .rate-off'
checks = await page.querySelectorAll("#div9 .rate-off")
j = 1
ack = randint(3,8)
for check in checks:
# 4 + randint(1, 3)
if j == ack:
await check.click()
j = j + 1
else:
# radio类型 .jqradio
# "#div1 .jqradio"
click_toolkit = id_name + " .jqradio"
print("test ", click_toolkit)
checks = await page.querySelectorAll(click_toolkit)
if i == 1 or i == 2:
k = randint(1, 2 * i)
l = 1
for check in checks:
if k == l:
await check.click()
l = l + 1
elif i == 6 or i == 8:
l = 1
for check in checks:
if l == 1:
print(l, "打印")
await check.click()
l = l + 1
print(l, "打印")
elif i == 4:
k = randint(2, 5)
l = 1
for check in checks:
if k == l:
await check.click()
l = l + 1
else:
for check in checks:
await check.click()
# 找到提交按钮提交
# await asyncio.sleep(1000)
submit = await page.querySelector('#divSubmit #ctlNext')
await submit.click()
await asyncio.sleep(2) # 页面延迟2s看是否提交成功
await browser.close()
while 1 == 1:
asyncio.get_event_loop().run_until_complete(main())
wait = randint(5 , 10)
time.sleep(wait)
- 常用模板
# 填空题:page.type(selector,text),在指定selector的元素上填写text
# await page.type('#q1', '姓名')
# await page.type('#q2', '学号')
# await page.type('#divquestion5 > table > tbody > tr:nth-child(1) > td > div > textarea', '体温')
# 单选题:先用page.querySelector(selector)找到指定的元素,再调用元素的click()方法, await?
button = await page.querySelector("#div1 .jqradio")
buttons = await page.querySelectorAll("#div1 .jqradio")
i = 1
for b in buttons:
if i == 2:
await b.click()
i = i+1
# 地址题:先点击手动填写地址,再在地址框内填写相应地址
# address = await page.querySelector("#divquestion7 > ul > li:nth-child(1) > label")
# await address.click()
# await page.type('#q9', '地址')
# 日期选择题:先点击日期选择框,在出现的iframe寻找元素并调用click()方法
# date1 = await page.querySelector("#q4")
# await date1.click()
# frame = page.frames
# date2 = await frame[1].querySelector('#selectTodayButton')
# await date2.click()
# 找到提交按钮提交
submit = await page.querySelector('#submit_button')
# await submit.click()
await asyncio.sleep(200) # 页面延迟2s看是否提交成功
# await browser.close()
以上是关于还在为写调查问卷发愁的你赶快来看看这个自动填写问卷(问卷星版)的主要内容,如果未能解决你的问题,请参考以下文章