discord.py-rewrite - 使用 PyQt5 的动态 Web 抓取无法正常工作
Posted
技术标签:
【中文标题】discord.py-rewrite - 使用 PyQt5 的动态 Web 抓取无法正常工作【英文标题】:discord.py-rewrite - Dynamic Web Scraping using PyQt5 not working properly 【发布时间】:2020-04-02 02:52:26 【问题描述】:简而言之,我正在制作一个不和谐的机器人,它将网站https://growtopiagame.com 中的“今日世界”图片下载为 D:\Kelbot/render.png,然后将图片发送到调用该命令的频道.但是,它不是静态网站,而且源代码中也没有URL,所以我找到了使用PyQt5的解决方案:
import re
import bs4 as bs
import sys
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
@client.command()
@commands.cooldown(1, 60, commands.BucketType.user)
async def wotd(ctx):
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
print('Load finished')
def Callable(self, html_str):
self.html = html_str
self.app.quit()
def main():
page = Page('https://growtopiagame.com')
soup = bs.BeautifulSoup(page.html, 'html.parser')
js_test = soup.find('a', class_='world-of-day-image')
link = []
for x in js_test:
link.append(str(x))
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', link[0])
urllib.request.urlretrieve(urls[0], "D:\Kelbot/render.png")
if __name__ == '__main__': main()
await ctx.send(file=discord.File('render.png'))
当我从任务计划程序运行机器人时,它不起作用。因此,我尝试使用我的 Python Shell 和 Visual Studio Code 来运行它,并且它们都有效。但是,当第二次调用该命令时,python shell 和 Visual Studio 代码都重新启动,并且机器人由于某种原因被杀死。是因为类与 discord.py 不兼容吗?我怎么可能解决这个问题。有没有比使用 PyQt5 更好的解决方案?
(有时我没有得到图片,而是得到https://growtopiagame.com/resources/assets/images/load.gif,这是他们在显示实际的每日世界图片之前放置的图像,但当我重新启动我的电脑时它会自行修复)
【问题讨论】:
【参考方案1】:PyQt5 与 asyncio 不兼容,尽管有一些库试图使其与 quamash、asyncqt、qasync 兼容,但在您的情况下,这不是必需的,因为您希望 Qt 执行的唯一任务不是抓取网络以获取图像的 ulr 并下载它,因此解决方法是创建一个功能就是这样的外部应用程序,然后在 wotd 函数中使用它:
├── downloader.py
├── .env
└── main.py
main.py
import asyncio
import os
import sys
import uuid
import discord
from discord.ext import commands
from dotenv import load_dotenv
bot = commands.Bot(command_prefix="!")
@commands.cooldown(1, 60, commands.BucketType.user)
@bot.command()
async def wotd(ctx):
current_dir = os.path.dirname(os.path.realpath(__file__))
images_dir = os.path.join(current_dir, "images")
if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
os.mkdir(images_dir)
output_filename = os.path.join(images_dir, ".png".format(uuid.uuid4()))
args = [sys.executable, os.path.join(current_dir, "downloader.py"), output_filename]
process = await asyncio.create_subprocess_exec(
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
print("Started: %s, pid=%s" % (args, process.pid), flush=True)
stdout, stderr = await process.communicate()
if process.returncode == 0:
print(
"Done: %s, pid=%s, result: %s"
% (args, process.pid, stdout.decode().strip()),
flush=True,
)
await ctx.send(file=discord.File(output_filename))
print("end", output_filename)
else:
print(
"Failed: %s, pid=%s, result: %s"
% (args, process.pid, stderr.decode().strip()),
flush=True,
)
print("error")
@wotd.error
async def wotd_error(ctx, error):
if isinstance(error, commands.CommandOnCooldown):
msg = "This command is ratelimited, please try again in :.2fs".format(
error.retry_after
)
await ctx.send(msg)
print(ctx, error)
def main():
load_dotenv()
token = os.getenv("DISCORD_TOKEN")
bot.run(token)
if __name__ == "__main__":
main()
downloader.py
import sys
from PyQt5 import QtCore, QtWidgets, QtNetwork, QtWebEngineWidgets
class DownLoader(QtCore.QObject):
def __init__(self, path, parent=None):
super().__init__(parent)
self.path = path
url = "https://growtopiagame.com"
self.manager = QtNetwork.QNetworkAccessManager(self)
profile = QtWebEngineWidgets.QWebEngineProfile(
QtCore.QUuid.createUuid().toString(QtCore.QUuid.Id128), self
)
self.page = QtWebEngineWidgets.QWebEnginePage(profile, self)
self.page.loadProgress.connect(print)
self.manager.finished.connect(self.on_finished)
self.page.loadFinished.connect(self.on_load_finished)
self.page.load(QtCore.QUrl(url))
@QtCore.pyqtSlot(bool)
def on_load_finished(self, ok):
if ok:
self.request_url()
else:
print("error", ok, file=sys.stderr)
QtCore.QCoreApplication.exit(-1)
def request_url(self):
js = """
function get_url()
var elements = document.getElementsByClassName("world-of-day-image")
if(elements.length)
var element = elements[0];
if(element.children.length)
var e = element.children[0]
if(e.tagName == "IMG")
return e.src
return "";
get_url();
"""
self.page.runjavascript(js, self.download)
def download(self, url):
if url:
print(url)
request = QtNetwork.QNetworkRequest(QtCore.QUrl(url))
self.manager.get(request)
else:
QtCore.QTimer.singleShot(100, self.request_url)
@QtCore.pyqtSlot(QtNetwork.QNetworkReply)
def on_finished(self, reply):
if reply.error() == QtNetwork.QNetworkReply.NoError:
file = QtCore.QFile(self.path)
if file.open(QtCore.QIODevice.WriteOnly):
r = reply.readAll()
print(len(r))
file.write(r)
file.close()
QtCore.QCoreApplication.quit()
else:
print(reply.error(), reply.errorString(), file=sys.stderr)
QtCore.QCoreApplication.exit(-1)
if __name__ == "__main__":
app = QtWidgets.QApplication(sys.argv)
parser = QtCore.QCommandLineParser()
parser.addPositionalArgument("path", "Path of image")
parser.process(app)
args = parser.positionalArguments()
if not args:
print("not path", file=sys.stderr)
sys.exit(-1)
path = args[0]
downloader = DownLoader(path)
sys.exit(app.exec_())
.env
DISCORD_TOKEN=YOUR_TOKEN_HERE
【讨论】:
以上是关于discord.py-rewrite - 使用 PyQt5 的动态 Web 抓取无法正常工作的主要内容,如果未能解决你的问题,请参考以下文章
Discord.py-rewrite wait_for() 我该如何使用?
Discord.py channel.connect() 永远不会返回