Python编程系统下的大数据处理(微信公众号的预测评价)
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python编程系统下的大数据处理(微信公众号的预测评价)相关的知识,希望对你有一定的参考价值。
import sqlite3 import xlrd def read_excel(): c = [] # 打开文件 workbook = xlrd.open_workbook(r‘F:/期中作业/dataWx_org.xlsx‘) # 获取所有sheet sheet_name = workbook.sheet_names()[0] sheet = workbook.sheet_by_name(sheet_name) #获取一行的内容 for i in range(1,sheet.nrows): a = [] for j in range(0,sheet.ncols): a.append(sheet.cell(i,j).value) c.append(a[0:13]) #返回整个表单 return c def table_create(c): ‘‘‘创建表并导入数据‘‘‘ #创建表 conn = sqlite3.connect(‘./exp.db‘) curs = conn.cursor() try: curs.execute(‘‘‘CREATE TABLE IF NOT EXISTS TB_CHECK (item FLOAT, name TEXT, wx_name TEXT, title TEXT, top FLOAT, posttime TEXT, day INT, readnum_pm FLOAT, likenum_pm FLOAT, get_time_pm TEXT, status FLOAT, url TEXT, content TEXT);‘‘‘) except sqlite3.OperationalError as e: print(e) #导入数据 # for each in c: # curs.execute("INSERT INTO TB_CHECK VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)" # , each[0:13]) conn.commit() return conn, curs def final_number(conn, curs): ‘‘‘算法A估计超过10万的阅读量‘‘‘ #设置阅读总量、关键字、时间,三个列表 key = [i for i in range(1,31)] allnumber = [] DAY = [i for i in range(41883,41913)] #计算每天的阅读总量并记录在列表中 for i in range(0,30): onenumber = 0 for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.day=%d"%DAY[i]): onenumber += row[7] allnumber.append(onenumber) #设置关键字key,排除无效数据 for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.readnum_pm=100001"): for i in range(0,30): if DAY[i]==row[6]: key[i]=0 #计算平均每天的阅读总量sumnumber daytime = 30 sumnumber = 0 for i in range(0,30): if key[i]==0: daytime -= 1 else: sumnumber += allnumber[i] sumnumber /= daytime print(sumnumber) #利用算法计算阅读量超过100001的实际阅读量,存入finalnum finalnum = [] for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.readnum_pm=100001"): for i in range(0,30): if DAY[i]==row[6]: finalnumber=sumnumber-allnumber[i] if finalnumber<0: finalnumber *= (-1) finalnumber += 100001 finalnum.append(finalnumber) print(finalnumber) #将新数据重新导入db文件 # for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.readnum_pm=100001"): # for i in range(0,30): # curs.execute(‘UPDATE TB_CHECK SET TB_CHECK.readnum_pm=? WHERE TB_CHECK.readnum_pm=?‘,(finalnum[i], 100001)) conn.commit() def final_Rank(conn, curs): ‘‘‘算法B评价微信影响力‘‘‘ #设置三个等级参数、总等级参数、条目参数 allRank1 = 0 allRank2 = 0 allRank3 = 0 Rank = [] Name = [] ITEM = [i for i in range(1,33)] #计算三个总参数,分别是总头条数,总阅读量,总点赞数 for row in curs.execute("SELECT * FROM TB_CHECK"): allRank1 += row[4] allRank2 += row[7] allRank3 += row[8] #求每个微信号的影响力等级,存入Rank中 for i in range(0,32): Rank1 = 0 Rank2 = 0 Rank3 = 0 for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.item=%f"%ITEM[i]): Rank1 += row[4] Rank2 += row[7] Rank3 += row[8] Rank.append(Rank1/allRank1*0.3 + Rank2/allRank2*0.4 +Rank3/allRank3*0.3) Name.append(row[1]) #冒泡排序 for i in range(0,32): for j in range(0,32): if Rank[i]>=Rank[j]: k = Rank[i] Rank[i] = Rank[j] Rank[j] = k k = ITEM[i] ITEM[i] = ITEM[j] ITEM[j] = k k = str(k) k = Name[i] Name[i] = Name[j] Name[j] = k #显示计算结果,分别是排名,微信号序列,微信号名称,微信号评分 for i in range(0,32): print(i+1,ITEM[i],Name[i],int(Rank[i]*1000)) conn.commit() def forecast(conn, curs): ‘‘‘算法C预计微信影响力变化‘‘‘ #设置三个参数,日期,条目,等级 DAY = [i for i in range(41883,41913)] ITEM = [i for i in range(1,33)] Rank_DAY = [] Rank = [] Name = [] NAME = [] IT = [] #计算每天每个微信号的排名变化,存入IT,算法过程类似于算法B部分 for i in range(0,30): allRank1 = 0 allRank2 = 0 allRank3 = 0 for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.day=%d"%DAY[i]): allRank1 += row[4] allRank2 += row[7] allRank3 += row[8] for Item in ITEM: Rank1 = 0 Rank2 = 0 Rank3 = 0 for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.day=%f AND TB_CHECK.item=%f"%(DAY[i],Item)): Rank1 += row[4] Rank2 += row[7] Rank3 += row[8] Rank_DAY.append(Rank1/allRank1*0.3 + Rank2/allRank2*0.4 +Rank3/allRank3*0.3) Name.append(row[1]) IT_DAY = [i for i in range(1,33)] for j in range(0,32): for k in range(0,32): if Rank_DAY[j]>=Rank_DAY[k]: k = Rank_DAY[i] Rank_DAY[i] = Rank_DAY[j] Rank_DAY[j] = k k = IT_DAY[i] IT_DAY[i] = IT_DAY[j] IT_DAY[j] = k k = str(k) k = Name[i] Name[i] = Name[j] Name[j] = k IT.append(IT_DAY) NAME.append(Name) Rank.append(Rank_DAY) # print(IT) #输入变化后的日期,输出变化结果 K = 1 while K == 1: inputday = int(input("输入未来天数")) for i in range(0,30): if inputday == i: for j in range(0,32): print(IT[i][j],NAME[i][j],int(Rank[i][j]*1000)) conn.commit() def main(): c = read_excel() conn, curs = table_create(c) i = 1 while i == 1: K = int(input("输入你要的操作:1,算法A 2,算法B 3,算法C :")) if K == 1: final_number(conn, curs) elif K == 2: final_Rank(conn, curs) elif K == 3: forecast(conn, curs) conn.close() main()
以上是关于Python编程系统下的大数据处理(微信公众号的预测评价)的主要内容,如果未能解决你的问题,请参考以下文章