如何计算列表元组中的相同项目
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了如何计算列表元组中的相同项目相关的知识,希望对你有一定的参考价值。
我有一个这样的列表元组:
tup_list = [('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 21.325), ('UL00628', 6.675), ('UL00628', 22.5), ('UL00628', 5.5), ('UL00628', 15.525), ('UL00628', 12.475), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00428-OGA', 28.0)]
我想计算每个元组中的相同项目,如UL00628,UL00428?我可以使用什么迭代?
顺便说一下,tup_list来自excel文件。代码如下:
load_all = dict()
file_name ='***.xls'
wb = xlrd.open_workbook('d:**%s'%file_name)
table = wb.sheet_by_name('***')
date_start_month= int(input('Pls enter the date of start month:'))
date_start_day= int(input('Pls enter the date of start day:'))
date_end_month= int(input('Pls enter the date of end month:'))
date_end_day = int(input('Pls enter the date of end day:'))
count = 0
tup_list = list()
tup = tuple()
nrows = table.nrows
if table.cell(1, 11).value == '****':
for num in range(2,nrows):
date_of_load = table.cell(num,11).value #this is a date value
#print(date_of_load)
year,month,day,hous,minute,second = xlrd.xldate.xldate_as_tuple(date_of_load,0) # date is a tuple, (y,m,d,h,min,second)
if month in range(date_start_month,date_end_month+1) :# the month is OK?
if day in range(date_start_day, date_end_day+1): # find the day
grade_name = table.cell(num,3).value #grade
grade_num = table.cell(num,5).value #quanlity
tup = (grade_name, grade_num)
tup_list.append(tup)
count +=1
^^^^^^^^^这是原始数据
NO Grade quantity Loadday
9 UL00628 28.0000 2018/2/7
10 UL00628 28.0000 2018/2/7
11 UL00628 28.0000 2018/2/7
12 EVA-OGC 28.0000 2018/2/7
13 EVA-OGC 28.0000 2018/2/7
14 UL00628 28.0000 2018/2/8
15 UL00628 28.0000 2018/2/8
16 UL00628 28.0000 2018/2/19
17 UL00628 28.0000 2018/2/19
18 UL00628 28.0000 2018/2/19
19 UL00628 28.0000 2018/2/19
20 UL00628 28.0000 2018/2/19
21 UL00628 28.0000 2018/2/19
22 UL00628 28.0000 2018/2/19
23 UL00628 28.0000 2018/2/19
24 UL00628 28.0000 2018/2/20
25 UL00628 28.0000 2018/2/20
26 UL00628 28.0000 2018/2/20
27 UL00628 28.0000 2018/2/20
28 UL00628 28.0000 2018/2/20
我需要找出正确的装载日,然后得到等级和数量并计算每个等级。
答案
对于数据修改,特别是涉及Excel或CSV文件,我会使用pandas
而不是直接openpyxl
除此之外,一旦你有元组列表,你可以使用defaultdict
from collections import defaultdict
results = defaultdict(list)
for grade, quantity in tup_list:
results[grade].append(quantity)
Other problems
我会做的其他改变是
- 将程序的不同部分放在不同的功能中
- 确保输入聚集可以在传入非
int
值时处理, - 将输入放在
dict
中,所以如果有一天你从另一个脚本或程序的一部分获得所需数据的输入,你可以轻松地重用这个 - 尽可能使用生成器而不是返回列表的函数
- 使用
with
语句打开资源 - 使用
pathlib.Path
处理文件和文件名 - 使用
if __name__ == "__main__"
测试
from collections import defaultdict
from pathlib import Path
import xlrd
def get_int_inputs(questions):
for key, msg in questions.items():
answer = None
while not answer:
try:
answer = int(input(msg))
yield key, answer
except ValueError:
pass
def parse_file(filename, inputs):
with xlrd.open_workbook(filename) as wb:
table = wb.sheet_by_name('***')
if table.cell(1, 11).value == '****':
for num in range(2,nrows):
year, month, day, *_ = xlrd.xldate.xldate_as_tuple(date_of_load, 0)
if not inputs['date_start_month'] < month <= inputs['date_end_month']:
continue
if not inputs['date_start_day'] < day <= inputs['date_end_day']:
continue
grade_name = table.cell(num,3).value
grade_num = table.cell(num,5).value
yield grade_name, grade_num
def aggregate(quantities):
results = defaultdict(list)
for grade_name, grade_num in quantities:
results[grade_name].append(grade_num)
return {grade_name: sum(val) for grade_name, val in results.items()}
if __name__ == '__main__':
wanted_input = {
'date_start_month': 'Pls enter the date of start month:',
'date_start_day': 'Pls enter the date of start day:',
'date_end_month': 'Pls enter the date of end month:',
'date_end_day': 'Pls enter the date of end day:',
}
inputs = dict(get_int_inputs(wanted_input))
filename = Path('D:/' , '***.xls')
quantities = parse_file(filename, inputs)
result = aggregate(quantities)
没有样本数据,我无法测试代码,因此可能充满了错误
Pandas
另一种方法是使用pandas
进行数据处理
然后你会得到类似的东西
from pathlib import Path
import pandas as pd
def parse_data(df, inputs):
if df.columns[11] != '****': # index might be different, depending on whether there is an index-col and 0- or 1-based indexing
return None
dates = df[<date_column_label>]
# or if it needs conversion to datetime
# dates = pd.to_datetime(df[<date_column_label>])
date_correct = dates.dt.month.between(
inputs['date_start_month'],
inputs['date_end_month'] + 1,
inclusive = False,
) & dates.dt.day.between(
inputs['date_start_day'],
inputs['date_end_day'] + 1,
inclusive = False,
)
return df[date_correct].groupby(<grade_name_label>)[<quantity_label>].sum()
if __name__ == '__main__':
wanted_input = {
'date_start_month': 'Pls enter the date of start month:',
'date_start_day': 'Pls enter the date of start day:',
'date_end_month': 'Pls enter the date of end month:',
'date_end_day': 'Pls enter the date of end day:',
}
inputs = dict(get_int_inputs(wanted_input))
filename = Path('D:/' , '***.xls')
df = pd.read_excel(filename, sheet_name='', header=0)
result = parse_data(df)
以上是关于如何计算列表元组中的相同项目的主要内容,如果未能解决你的问题,请参考以下文章