python pandas中的GroupData
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python pandas中的GroupData相关的知识,希望对你有一定的参考价值。
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
#Let's make a dframe
dframe = DataFrame({'k1':['X','X','Y','Y','Z'],
'k2':['alpha','beta','alpha','beta','alpha'],
'dataset1':np.random.randn(5),
'dataset2':np.random.randn(5)})
#Show
dframe
#Now let's see how to use groupby
#Lets grab the dataset1 column and group it by the k1 key
group1 = dframe['dataset1'].groupby(dframe['k1'])
#Show the groupby object
group1
#Now we can perform operations on this particular group
group1.mean()
# We can use group keys that are series as well
#For example:
#We'll make some arrays for use as keys
cities = np.array(['NY','LA','LA','NY','NY'])
month = np.array(['JAN','FEB','JAN','FEB','JAN'])
#Now using the data from dataset1, group the means by city and month
dframe['dataset1'].groupby([cities,month]).mean()
# let's see the original dframe again.
dframe
# WE can also pass column names as group keys
dframe.groupby('k1').mean()
# Or multiple column names
dframe.groupby(['k1','k2']).mean()
# Another useful groupby method is getting the group sizes
dframe.groupby(['k1']).size()
# We can also iterate over groups
#For example:
for name,group in dframe.groupby('k1'):
print "This is the %s group" %name
print group
print '\n'
# We can also iterate with multiple keys
for (k1,k2) , group in dframe.groupby(['k1','k2']):
print "Key1 = %s Key2 = %s" %(k1,k2)
print group
print '\n'
# A possibly useful tactic is creating a dictionary of the data pieces
group_dict = dict(list(dframe.groupby('k1')))
#Show the group with X
group_dict['X']
# We could have also chosen to do this with axis = 1
# Let's creat a dictionary for dtypes of objects!
group_dict_axis1 = dict(list(dframe.groupby(dframe.dtypes,axis=1)))
#show
group_dict_axis1
# Next we'll learn how to use groupby with columns
# For example if we only wanted to group the dataset2 column with both sets of keys
dataset2_group = dframe.groupby(['k1','k2'])[['dataset2']]
dataset2_group.mean()
#Next we'll have a quick lesson on grouping with dictionaries and series!
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
#let's learn how to use dict or series with groupby
# Let's make a Dframe
animals = DataFrame(np.arange(16).reshape(4, 4),
columns=['W', 'X', 'Y', 'Z'],
index=['Dog', 'Cat', 'Bird', 'Mouse'])
#Now lets add some NAN values
animals.ix[1:2, ['W', 'Y']] = np.nan
#Show
animals
# Now let's say I had a dictionary with ebhavior values in it
behavior_map = {'W': 'good', 'X': 'bad', 'Y': 'good','Z': 'bad'}
# Now we can groupby using that mapping
animal_col = animals.groupby(behavior_map, axis=1)
# Show the sum accroding to the groupby with the mapping
animal_col.sum()
# For example [dog][good] = [dog][Y]+[dog][W]
# Now let's try it with a Series
behav_series = Series(behavior_map)
#Show
behav_series
# Now let's groupby the Series
animals.groupby(behav_series, axis=1).count()
# We can also groupby with functions!
#Show our dframe again
animals
# Lets assume we wanted to group by the length of the animal names, we can pass the len function into groupby!
# Show
animals.groupby(len).sum()
#Note the index is now number of letters in the animal name
# We can also mix functions with arrays,dicts, and Series for groupby methods
# Set a list for keys
keys = ['A', 'B', 'A', 'B']
# Now groupby length of name and the keys to show max values
animals.groupby([len, keys]).max()
# We can also use groupby with hierarchaly index levels
#Create a hierarchal column index
hier_col = pd.MultiIndex.from_arrays([['NY','NY','NY','SF','SF'],[1,2,3,1,2]],names=['City','sub_value'])
# Create a dframe with hierarchal index
dframe_hr = DataFrame(np.arange(25).reshape(5,5),columns=hier_col)
#Multiply values by 100 for clarity
dframe_hr = dframe_hr*100
#Show
dframe_hr
#Up next: Data Aggregation!!
以上是关于python pandas中的GroupData的主要内容,如果未能解决你的问题,请参考以下文章