import pandas as pd
import numpy as np
#create a series with labels
labels = ['a','b','c','d','e']
s = pd.Series(np.random.randn(5),index=labels)
#create a dataframe
df = pd.DataFrame({'a':np.random.randn(6),
'b':['foo','bar']*3,
'c':np.random.randn(6)})
#load dataframe from csv
df.read_csv("data/mydata.csv")
close_px = pd.read_csv('stock_data.csv',index_col=0,parse_dates=True)
close_px['AA'][0]
#print first 5 rows
df.head() #add n as param for first n rows
#print last 5 rows
df.tail() #add n as param for last n rows
#view rows from index
df[10:20] #this operation is called slicing
#slice by column
df['col1'] # single column
df[['col1','col2']] #multiple cols
#slice by row ranges
df.ix['2001-01-01':'2001-12-31']
#slice by row and column
df.ix['2001-01-01':'2001-12-31',['col1','col2']]
#print max/min for a column
df['col'].max()
df['col'].min()
#show dataframe index and columns
df.index
df.columns
#show transpose matrix
df.transpose
#fill missing value
#http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.fillna.html
df.fillna(value="0") #fill with 0
df.fillna(method="ffill") #use last valid value
df.fillna(method="bfill",inplace=True) #use first valid value, inplace is used to modify the existing dataframe
#plot a graph
import matplotlib.pyplot as plt
df["col1","col2"].plot()
plt.show()