Tkinter

Posted 2020-10-29 呼兰河畔

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Tkinter相关的知识，希望对你有一定的参考价值。

采集小工具，目前采集主要针对知乎文章与评论,今天刚开始弄，会不断更新完善

目前效果（测试站点：科技；测试连接：http://zhihu.sogou.com/include/pc/pc/topic/topic2_0.html）

1.输入框输入站点与连接

2.点击提交链接进行采集（会判断链接是否有效，文本框显示输入的站点与连接）

3.每次输入的站点与连接存到同目录下的txt文件中

4.点击采集所有采集（将txt中所有链接进行采集）

数据库显示

tkinter 代码

import tkinter
import tkinter.filedialog
import tkinter.messagebox
from tkinter.scrolledtext import ScrolledText
from threading import Thread
from yanshi.hot import *

class Cmpfile:
    def __init__(self) :
        self.list1=[]
        self.list2=[]
        self.item={}
        root=tkinter.Tk()
        self.root=root
        self.root.title(\'知乎\')
        self.root.minsize(400,350)
        self.Menu1()
        self.Label1()
        self.root.mainloop()

    def thread_up(self,func):
        t = Thread(target=func)  # 此时线程是新建状态
        t.setDaemon(True)
        t.start()  # 启动线程

    def up(self):
        entry1=self.entry1.get()
        entry2=self.entry2.get()

        self.text.insert(\'insert\',entry1+\'-\'+entry2+\'\\n\')
        tkinter.messagebox.showinfo(\'温馨提示\', \'开始采集\')
        self.item[entry1]=entry2
        print(self.item)
        self.list1.append(self.item)
        a=zhihu(self.list1)
        b=a.starts()
        tkinter.messagebox.showinfo(\'温馨提示\',b)
        if b==\'采集结束\':
            f = open(\'./1.txt\', \'a\')
            f.write(entry1 + \':\' + entry2 + \'\\n\')
            f.close
        self.entry1.delete(0,\'end\')
        self.entry2.delete(0,\'end\')
        self.item={}
        self.list1=[]

    def load(self):
        self.clear()
        f=open(\'./1.txt\',\'r\')
        one_list=f.readlines()
        for one in one_list:
            self.text.insert(\'insert\',one+\'\\n\')
        f.close()

    def clear(self):
        self.text.delete(0.0,\'end\')

    def all(self):
        f=open(\'./1.txt\',\'r\')
        one_list=f.readlines()
        for one in one_list:
            self.text.insert(\'insert\', one)
            item = {}
            pattern=\'(.*?)-(.*?)\\n\'
            re_list=re.findall(pattern,one)
            if re_list==[]:
                pass
            else:
                item[re_list[0][0]]=re_list[0][1]
                self.list2.append(item)
        self.list2=[]
        f.close()

    #用place基础布局
    def Menu1(self):
        # 添加菜单
        menu = tkinter.Menu(self.root)
        # 添加查看子menu
        lookmenu = tkinter.Menu(menu, tearoff=0, bg=\'purple\', fg=\'white\')
        # 添加编辑子menu
        menu.add_cascade(menu=lookmenu, label=\'日志\')
        # 添加帮助子menu
        menu.add_cascade(menu=lookmenu, label=\'帮助\')
        #添加登录子menu
        menu.add_cascade(menu=lookmenu, label=\'登录\')
        # 添加查看子menu
        menu.add_cascade(menu=lookmenu, label=\'查看\')
        self.root.config(menu=menu)

    def Label1(self):
        label1=tkinter.Label(self.root,text = \'sitename\',height=1,width=1,pady=3,bd=3).place(relx=0.02,rely=0.05,relwidth=0.2)
        label2=tkinter.Label(self.root,text = \'link\',height=1,width=1,pady=3,bd=3).place(relx=0.02,rely=0.15,relwidth=0.2)
        self.entry1 = tkinter.Entry(self.root, width=40, bg=\'white\', bd=5)
        self.entry1.place(relx=0.25, rely=0.05, relwidth=0.7)
        self.entry2 = tkinter.Entry(self.root, width=40, bg=\'white\', bd=5)
        self.entry2.place(relx=0.25, rely=0.15, relwidth=0.7)

        button1 = tkinter.Button(self.root, text=\'提交链接\', height=1, width=8, pady=5, bd=1,command=lambda :self.thread_up(self.up)).place(x=105, y=100)
        button2 = tkinter.Button(self.root, text=\'载入文本\', height=1, width=8, pady=5, bd=1,command=self.load).place(x=190, y=100)
        button3 = tkinter.Button(self.root, text=\'采集所有\', height=1, width=8, pady=5, bd=1,command=self.all).place(x=275, y=100)
        self.text=ScrolledText(self.root,height=8,width=37,bg=\'white\',pady=3,bd=3)
        self.text.place(x=100, y=150)
        button4 = tkinter.Button(self.root, text=\'清空\', height=1, width=8, pady=5, bd=1,command=self.clear).place(x=105 ,y=280)

#实例化对象
one=Cmpfile()

采集代码另外的py文件中，运行时引用

import re
import pymysql
import time
import datetime
import requests
from lxml import etree
from multiprocessing.dummy import Pool as ThreadPool
class zhihu(object):
    def __init__(self,urls):
        self.url_list=urls
        self.headers={
        \'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/63.0.3239.108 Safari/537.36\',
        \'Connection\':\'keep-alive\',
        \'Host\':\'zhihu.sogou.com\',
        \'Referer\':\'http://zhihu.sogou.com/\',}
        self.headers1={
            \'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36\',
            \'Referer\': \'https://www.zhihu.com/\',
        }
    def ToResponse(self,res):
        res.encoding=res.apparent_encoding
        args=etree.HTML(res.text)
        return args

    def request_url(self,url):
        response=requests.get(url,headers=self.headers)
        response=self.ToResponse(response)
        link_list = response.xpath(\'//li/p[@class="tit"]/a/@href\')
        return link_list

    def connectdb(self):
        print(\'连接到mysql服务器...\')
        # 打开数据库连接
        db = pymysql.connect("服务器", "root", "123456", "zhihu",charset=\'utf8\')
        print(\'连接上了!\')
        return db

    def get_link(self,item):
        for key,value in item.items():
            url=value
            groupname=key
            link_list=self.request_url(url)
            db = self.connectdb()
            cursor = db.cursor()
            for link in link_list:
                print(link)
                response = requests.get(url=link, headers=self.headers1)
                response = self.ToResponse(response)
                IR_GROUPNAME = \'问答社区\'
                IR_SITENAME=\'搜狗知乎\'
                IR_CHANNEL=groupname
                IR_URLNAME=link
                BBSNUM=\'0\'
                IR_LASTTIME = datetime.datetime.now().strftime(\'%Y-%m-%d %H:%M:%S\')
                try:
                    IR_URLTITLE = response.xpath(\'//h1[@class="QuestionHeader-title"]/text()\')[0]
                    IR_QUESTION = \'\'.join(response.xpath("//span[@class=\'RichText\']//text()")) if response.xpath(
                        "//span[@class=\'RichText\']//text()") else \'\'
                    IR_RETURN = response.xpath("//div[@class=\'List-header\']//h4/span/text()")[0]
                    IR_FOLLOW = \\
                        response.xpath("//div[@class=\'QuestionFollowStatus\']//div[@class=\'NumberBoard-item\'][1]//strong/text()")[0]
                    IR_VIEW = response.xpath("//div[@class=\'QuestionFollowStatus\']//div[@class=\'NumberBoard-item\'][2]//strong/text()")[
                        0]
                    print(IR_GROUPNAME, IR_URLTITLE, IR_QUESTION, IR_RETURN, IR_FOLLOW, IR_VIEW)

                    sql = "INSERT INTO wenda (IR_GROUPNAME,IR_SITENAME,IR_CHANNEL,IR_URLTITLE,IR_QUESTION,IR_URLNAME,IR_LASTTIME,IR_VIEW,IR_FOLLOW,IR_RETURN,BBSNUM) VALUES (\'" + IR_GROUPNAME + "\',\'" + IR_SITENAME + "\',\'" + IR_CHANNEL + "\',\'" + IR_URLTITLE + "\',\'" + IR_QUESTION + "\',\'" + IR_URLNAME + "\',\'" + IR_LASTTIME + "\',\'" + IR_VIEW+ "\',\'" + IR_FOLLOW + "\',\'" + IR_RETURN + "\',\'" + BBSNUM + "\')"
                    try:
                        # 执行sql语句
                        cursor.execute(sql)
                        # 提交到数据库执行
                        db.commit()
                    except Exception as e:
                        # Rollback in case there is any error
                        print(\'插入数据失败!\')
                        print(e)
                        db.rollback()

                    #回帖
                    huifu = response.xpath("//div[@class=\'List\']//div[@class=\'List-item\']")
                    print(huifu)
                    num=1
                    for one in huifu:
                        IR_AUTHOR = one.xpath(".//div[@class=\'AuthorInfo-head\']//a[@class=\'UserLink-link\']/text()")[0] if one.xpath(
                            ".//div[@class=\'AuthorInfo-head\']//a[@class=\'UserLink-link\']/text()") else \'匿名用户\'
                        if IR_AUTHOR == \'匿名用户\':
                            IR_AUTHOR_LINK = \'\'
                        else:
                            IR_AUTHOR_LINK = \'https:\' + one.xpath(".//div[@class=\'AuthorInfo-head\']//a[@class=\'UserLink-link\']/@href")[
                                0]
                        IR_RESPONSE = \'\'.join(one.xpath(".//div[@class=\'RichContent-inner\']//text()"))
                        IR_URLTIME = one.xpath(".//div[@class=\'ContentItem-time\']//span/text()")[0]
                        pattern = r\'((\\u53d1\\u5e03\\u4e8e|\\u7f16\\u8f91\\u4e8e).*?(\\d+-\\d+-\\d+|\\d+:\\d+))\'
                        gone = re.search(pattern, IR_URLTIME).group(1)
                        gtwo = re.search(pattern, IR_URLTIME).group(3)
                        if \'昨天\' not in gone and \'-\' not in gtwo:
                            IR_URLTIME = time.strftime("%Y/%m/%d") + \' \' + gtwo
                        elif \'-\' in gtwo:
                            IR_URLTIME = gtwo
                        else:
                            IR_URLTIME = str(datetime.date.today() - datetime.timedelta(days=1)) + \' \' + gtwo
                        IR_AGREE = one.xpath(".//button[@aria-label=\'赞同\']/text()")[0]
                        if \'K\' in IR_AGREE:
                            IR_AGREE=str(int(float(IR_AGREE.replace(\'K\',\'\'))*1000))

                        print(IR_AGREE)
                        pattern=\'\\d+\'
                        IR_COMMENT = one.xpath(".//div[@class=\'ContentItem-actions RichContent-actions\']/button[1]/text()")[0]
                        try:
                            IR_COMMENT = re.search(pattern, IR_COMMENT).group()
                        except:
                            IR_COMMENT = \'0\'
                        BBSNUM=str(num)
                        sql = "INSERT INTO wenda (IR_GROUPNAME,IR_SITENAME,IR_CHANNEL,IR_URLTITLE,IR_URLNAME,IR_URLTIME,IR_LASTTIME,BBSNUM,IR_AUTHOR,IR_RESPONSE,IR_AGREE,IR_COMMENT) VALUES (\'" + IR_GROUPNAME + "\',\'" + IR_SITENAME + "\',\'" + IR_CHANNEL + "\',\'" + IR_URLTITLE + "\',\'" + IR_URLNAME + "\',\'" + IR_URLTIME + "\',\'" + IR_LASTTIME + "\',\'" + BBSNUM + "\',\'" + IR_AUTHOR + "\',\'" + IR_RESPONSE + "\',\'" + IR_AGREE + "\',\'" + IR_COMMENT + "\')"
                        try:
                            # 执行sql语句
                            cursor.execute(sql)
                            # 提交到数据库执行
                            db.commit()
                        except Exception as e:
                            # Rollback in case there is any error
                            print(\'插入数据失败!\')
                            print(e)
                            db.rollback()
                        num+=1
                        time.sleep(1)

                except Exception as e:
                    print(e)
    def starts(self):
        try:
            pool = ThreadPool(5)
            time3 = time.time()
            pool.map(self.get_link, self.url_list)
            pool.close()
            pool.join()
            time4 = time.time()
            print (\'多线程耗时 : \' + str(time4 - time3) + \' s\')
            return \'采集结束\'
        except:
            return \'链接错误\'

以上是关于Tkinter的主要内容，如果未能解决你的问题，请参考以下文章