discord.py-rewrite - 使用 PyQt5 的动态 Web 抓取无法正常工作

Posted

技术标签:

【中文标题】discord.py-rewrite - 使用 PyQt5 的动态 Web 抓取无法正常工作【英文标题】:discord.py-rewrite - Dynamic Web Scraping using PyQt5 not working properly 【发布时间】:2020-04-02 02:52:26 【问题描述】:

简而言之,我正在制作一个不和谐的机器人,它将网站https://growtopiagame.com 中的“今日世界”图片下载为 D:\Kelbot/render.png,然后将图片发送到调用该命令的频道.但是,它不是静态网站,而且源代码中也没有URL,所以我找到了使用PyQt5的解决方案:

import re
import bs4 as bs
import sys
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
@client.command()
@commands.cooldown(1, 60, commands.BucketType.user)
async def wotd(ctx):
    class Page(QWebEnginePage):
        def __init__(self, url):
            self.app = QApplication(sys.argv)
            QWebEnginePage.__init__(self)
            self.html = ''
            self.loadFinished.connect(self._on_load_finished)
            self.load(QUrl(url))
            self.app.exec_()

        def _on_load_finished(self):
            self.html = self.toHtml(self.Callable)
            print('Load finished')

        def Callable(self, html_str):
            self.html = html_str
            self.app.quit()

    def main():
        page = Page('https://growtopiagame.com')
        soup = bs.BeautifulSoup(page.html, 'html.parser')
        js_test = soup.find('a', class_='world-of-day-image')
        link = []
        for x in js_test:
            link.append(str(x))
        urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', link[0])
        urllib.request.urlretrieve(urls[0], "D:\Kelbot/render.png")
    if __name__ == '__main__': main()
    await ctx.send(file=discord.File('render.png'))

当我从任务计划程序运行机器人时,它不起作用。因此,我尝试使用我的 Python Shell 和 Visual Studio Code 来运行它,并且它们都有效。但是,当第二次调用该命令时,python shell 和 Visual Studio 代码都重新启动,并且机器人由于某种原因被杀死。是因为类与 discord.py 不兼容吗?我怎么可能解决这个问题。有没有比使用 PyQt5 更好的解决方案?

(有时我没有得到图片,而是得到https://growtopiagame.com/resources/assets/images/load.gif,这是他们在显示实际的每日世界图片之前放置的图像,但当我重新启动我的电脑时它会自行修复)

【问题讨论】:

【参考方案1】:

PyQt5 与 asyncio 不兼容,尽管有一些库试图使其与 quamash、asyncqt、qasync 兼容,但在您的情况下,这不是必需的,因为您希望 Qt 执行的唯一任务不是抓取网络以获取图像的 ulr 并下载它,因此解决方法是创建一个功能就是这样的外部应用程序,然后在 wotd 函数中使用它:

├── downloader.py
├── .env
└── main.py

ma​​in.py

import asyncio
import os
import sys
import uuid

import discord
from discord.ext import commands

from dotenv import load_dotenv

bot = commands.Bot(command_prefix="!")


@commands.cooldown(1, 60, commands.BucketType.user)
@bot.command()
async def wotd(ctx):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    images_dir = os.path.join(current_dir, "images")

    if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
        os.mkdir(images_dir)

    output_filename = os.path.join(images_dir, ".png".format(uuid.uuid4()))

    args = [sys.executable, os.path.join(current_dir, "downloader.py"), output_filename]
    process = await asyncio.create_subprocess_exec(
        *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
    )
    print("Started: %s, pid=%s" % (args, process.pid), flush=True)
    stdout, stderr = await process.communicate()
    if process.returncode == 0:
        print(
            "Done: %s, pid=%s, result: %s"
            % (args, process.pid, stdout.decode().strip()),
            flush=True,
        )
        await ctx.send(file=discord.File(output_filename))
        print("end", output_filename)
    else:
        print(
            "Failed: %s, pid=%s, result: %s"
            % (args, process.pid, stderr.decode().strip()),
            flush=True,
        )
        print("error")


@wotd.error
async def wotd_error(ctx, error):
    if isinstance(error, commands.CommandOnCooldown):
        msg = "This command is ratelimited, please try again in :.2fs".format(
            error.retry_after
        )
        await ctx.send(msg)
    print(ctx, error)


def main():
    load_dotenv()
    token = os.getenv("DISCORD_TOKEN")
    bot.run(token)


if __name__ == "__main__":
    main()

downloader.py

import sys

from PyQt5 import QtCore, QtWidgets, QtNetwork, QtWebEngineWidgets


class DownLoader(QtCore.QObject):
    def __init__(self, path, parent=None):
        super().__init__(parent)
        self.path = path

        url = "https://growtopiagame.com"
        self.manager = QtNetwork.QNetworkAccessManager(self)

        profile = QtWebEngineWidgets.QWebEngineProfile(
            QtCore.QUuid.createUuid().toString(QtCore.QUuid.Id128), self
        )
        self.page = QtWebEngineWidgets.QWebEnginePage(profile, self)
        self.page.loadProgress.connect(print)

        self.manager.finished.connect(self.on_finished)
        self.page.loadFinished.connect(self.on_load_finished)

        self.page.load(QtCore.QUrl(url))

    @QtCore.pyqtSlot(bool)
    def on_load_finished(self, ok):
        if ok:
            self.request_url()
        else:
            print("error", ok, file=sys.stderr)
            QtCore.QCoreApplication.exit(-1)

    def request_url(self):
        js = """
        function get_url()
            var elements = document.getElementsByClassName("world-of-day-image")
            if(elements.length)
                var element = elements[0];
                if(element.children.length)
                    var e = element.children[0]
                    if(e.tagName == "IMG")
                        return e.src
                
            
            return "";
        
        get_url();
        """
        self.page.runjavascript(js, self.download)

    def download(self, url):
        if url:
            print(url)
            request = QtNetwork.QNetworkRequest(QtCore.QUrl(url))
            self.manager.get(request)
        else:
            QtCore.QTimer.singleShot(100, self.request_url)

    @QtCore.pyqtSlot(QtNetwork.QNetworkReply)
    def on_finished(self, reply):
        if reply.error() == QtNetwork.QNetworkReply.NoError:
            file = QtCore.QFile(self.path)
            if file.open(QtCore.QIODevice.WriteOnly):
                r = reply.readAll()
                print(len(r))
                file.write(r)
            file.close()
            QtCore.QCoreApplication.quit()
        else:
            print(reply.error(), reply.errorString(), file=sys.stderr)
            QtCore.QCoreApplication.exit(-1)


if __name__ == "__main__":
    app = QtWidgets.QApplication(sys.argv)
    parser = QtCore.QCommandLineParser()
    parser.addPositionalArgument("path", "Path of image")
    parser.process(app)
    args = parser.positionalArguments()
    if not args:
        print("not path", file=sys.stderr)
        sys.exit(-1)
    path = args[0]
    downloader = DownLoader(path)
    sys.exit(app.exec_())

.env

DISCORD_TOKEN=YOUR_TOKEN_HERE

【讨论】:

以上是关于discord.py-rewrite - 使用 PyQt5 的动态 Web 抓取无法正常工作的主要内容,如果未能解决你的问题,请参考以下文章

Discord.py-rewrite wait_for() 我该如何使用?

Discord.py channel.connect() 永远不会返回

Discord.py 重写多服务器数据

free(p),是啥意思?是单纯p==NULL还是说p指向的内容变为空。使用free()之后 p和内存的状态是啥情况的

使用 p 值逐步回归以删除 p 值不显着的变量

如何在 p:dataTable 中使用 p:graphicImage 和 StreamedContent? [复制]