数据清洗之 聚合函数使用
Posted wx62c62b36cedf9
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了数据清洗之 聚合函数使用相关的知识,希望对你有一定的参考价值。
聚合函数使用
- 对分组对象使用agg聚合函数
- Groupby.agg(func)
- 针对不同的变量使用不同的统计方法
import pandas as pd
import numpy as np
import
os.getcwd()
D:\\\\Jupyter\\\\notebook\\\\Python数据清洗实战\\\\数据清洗之数据统计
os.chdir(D:\\\\Jupyter\\\\notebook\\\\Python数据清洗实战\\\\数据)
df = pd.read_csv(online_order.csv, encoding=gbk, dtype=customer:str, order:str)
grouped = df.groupby(weekday)
grouped
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000248E5285860>
# agg可同时统计多个,更加灵活
grouped.agg([np.mean, np.max, np.min])
total_items | discount% | hour | Food% | ... | Beauty% | Health% | Baby% | Pets% | |||||||||||||
mean | amax | amin | mean | amax | amin | mean | amax | amin | mean | ... | amin | mean | amax | amin | mean | amax | amin | mean | amax | amin | |
weekday | |||||||||||||||||||||
1 | 30.662177 | 182 | 1 | 8.580705 | 100.0 | -65.15 | 14.693122 | 23 | 0 | 22.690866 | ... | 0.0 | 1.152285 | 100.00 | 0.0 | 11.592562 | 100.0 | 0.0 | 1.007306 | 100.0 | 0.0 |
2 | 31.868612 | 282 | 1 | 8.638014 | 100.0 | -23.89 | 14.966197 | 23 | 0 | 23.994915 | ... | 0.0 | 1.079423 | 100.00 | 0.0 | 11.277284 | 100.0 | 0.0 | 1.272638 | 100.0 | 0.0 |
3 | 31.869796 | 298 | 1 | 7.794507 | 100.0 | -26.15 | 15.059898 | 23 | 0 | 24.309274 | ... | 0.0 | 1.156829 | 100.00 | 0.0 | 9.591389 | 100.0 | 0.0 | 0.937205 | 100.0 | 0.0 |
4 | 32.251899 | 221 | 1 | 8.068155 | 100.0 | -17.24 | 14.324185 | 23 | 0 | 24.374364 | ... | 0.0 | 1.031490 | 100.00 | 0.0 | 9.058201 | 100.0 | 0.0 | 1.080473 | 100.0 | 0.0 |
5 | 31.406619 | 167 | 1 | 9.159031 | 100.0 | -47.26 | 13.386919 | 23 | 0 | 24.602790 | ... | 0.0 | 1.248605 | 100.00 | 0.0 | 9.655343 | 100.0 | 0.0 | 0.908227 | 100.0 | 0.0 |
6 | 32.154814 | 190 | 1 | 8.414258 | 100.0 | -39.84 | 14.751084 | 23 | 0 | 23.743196 | ... | 0.0 | 1.170585 | 81.77 | 0.0 | 11.478343 | 100.0 | 0.0 | 1.150980 | 100.0 | 0.0 |
7 | 32.373837 | 222 | 1 | 8.710171 | 100.0 | -63.64 | 16.989535 | 23 | 0 | 22.271512 | ... | 0.0 | 1.145938 | 72.77 | 0.0 | 13.844250 | 100.0 | 0.0 | 0.950391 | 100.0 | 0.0 |
7 rows × 33 columns
# 同时计算total_items和总和 和 Food%的均值
grouped.agg(total_items:np.sum, Food%:np.mean)
total_items | Food% | |
weekday | ||
1 | 191240 | 22.690866 |
2 | 158387 | 23.994915 |
3 | 150043 | 24.309274 |
4 | 131620 | 24.374364 |
5 | 79710 | 24.602790 |
6 | 74149 | 23.743196 |
7 | 167049 | 22.271512 |
grouped.agg(total_items:np.sum, Food%:[np.mean, np.max])
total_items | Food% | ||
sum | mean | amax | |