python提取分析表格数据

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python提取分析表格数据相关的知识,希望对你有一定的参考价值。

#/bin/python3.4
# -*- coding: utf-8 -*-

import xlrd

def open_excel(file="file.xls"):
try:
data = xlrd.open_workbook(file)
return data
except Exception:
print("please check excel!")

# 根据索引获取Excel表格数据
# 参数:table:Excel文件路径 colnameindex:表头列名所在行,by_index:表索引
def excel_table_byindex(file="file.xls", colindex=0, rowindex=0, byindex=0):
data = open_excel(file)
table = data.sheets()[byindex]
nrows = table.nrows
ncols = table.ncols

rowname = table.row_values(colindex) #获取某一行数据
colname = table.col_values(rowindex) #获取某一列数据

return colname

# 获取项目组成员信息
def get_project_info(file="file.xls", byindex=0):
data = open_excel(file)
table = data.sheets()[byindex]

nrows = table.nrows
ncols = table.ncols

projectname = ["icotos", "cgsl","cgel", "tsp", "vp", "经营团队"]

ictos_colnames = table.col_values(0) # 某一列数据
cgsl_colnames = table.col_values(2)
cgel_colnames = table.col_values(4)
tsp_colnames = table.col_values(6)
vp_colnames = table.col_values(8)
manage_colnames = table.col_values(10)

ictos_memset = ictos_colnames[3:44]
cgsl_memset = cgsl_colnames[3:29]
cgel_memset = cgsl_colnames[3:36]
tsp_memset = cgel_colnames[3:25]
vp_memset = vp_colnames[3:50]
manage_set = manage_colnames[3:13]

projectset = [ictos_memset, cgsl_memset, cgel_memset, tsp_memset, vp_memset, manage_set]

for i in range(len(projectname)):
#print(projectname[i] + "项目组成员"+str(projectset[i])+"\n 共计:%d"%len(projectset[i]))
pass

return projectset


def main():
file2 = "/media/A/work/CI工作/项目组材料/项目组成员清单.xlsx"
data2 = get_project_info(file2)
print(data2)

file1= "/media/A/work/CI工作/9月/员工提交量统计导出20170930170546.xls"
data1 = excel_table_byindex(file1, 4, 4, 0)
commit_info = data1[1:len(data1)]
print("共提交信息%d"%len(commit_info) + "\n"+ str(commit_info))

result= {"ictos":0, "cgsl":0, "cgel":0, "tsp":0, "vp":0, "manager":0}
for item in set(commit_info):
print("the %s has found %d" % (item, commit_info.count(item)))
if item in data2[0]:
result[‘ictos‘] += commit_info.count(item)
if item in data2[1]:
result[‘cgsl‘] += commit_info.count(item)
if item in data2[2]:
result[‘cgel‘] += commit_info.count(item)
if item in data2[3]:
result[‘tsp‘] += commit_info.count(item)
if item in data2[4]:
result[‘vp‘] += commit_info.count(item)
if item in data2[5]:
result[‘manager‘] += commit_info.count(item)

for item in result.items():
print(item)

if __name__ == "__main__":
main()



























































































以上是关于python提取分析表格数据的主要内容,如果未能解决你的问题,请参考以下文章

人口普查分析:利用python+百度文字识别提取图片中的表格数据

人口普查分析:利用python+百度文字识别提取图片中的表格数据

浏览pdf文件以查找特定页面并使用python从图像中提取表格数据

python中 列表中有字典怎么提取数据

Python-Camelot:用三行代码提取PDF表格数据

如何使用 python 将给定的 PDF 提取到文本和表格并将数据存储在 .csv 文件中?