python padas ??????
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python padas ??????相关的知识,希望对你有一定的参考价值。
?????????clip frame l????????? values head ?????? mysql index rank
import matplotlib from pandas import DataFrame import numpy as np import pandas as pd import MySQLdb import matplotlib.pyplot as plt #df =Padaas dataframe ??????(??????????????????) #s =Pandas series??????(??????????????????) db = MySQLdb.connect(host="localhost", port=3306, user="root", passwd="1234", db=???spj???,charset="utf8")#??????????????? filename = ???count_day.csv???#??????????????? query = ???select * from j???#sql???????????? ????????? #???????????? pd.read_excel(filename)#???excel?????????????????? pd.read_table(filename)#???csv?????????????????????????????? pd.read_json(json-string)#???json?????????????????????????????? pd.read_html(url)#??????url???html????????????????????????tables?????? pd.read_clipboard()#?????????????????????????????????read_tables() #???sql????????????????????? df=pd.read_sql(query,db) #???csv???????????? #index_col?????????????????????????????????Dataframe???????????? #??????????????????,?????? df = pd.read_csv(filename,sep=???,???,header=None,index_col=0) #???????????? df.to_csv(???ans.csv???,index=False,sep=???,???)#???????????????csv df.to_excel(filename)#???????????????excel df.to_sql(table_name,db)#???????????????sql df.to_json(filename)#???json????????????????????????????????? #????????????dataframe?????? df = pd.DataFrame([[1,2,3],[4,5,6]],columns=[???f1???,???f2???,???f3???])#columns???????????? df = pd.DataFrame({???user-id???:[??????1???,??????2???,??????3???],???item_id???:[?????????1???,?????????2???,?????????3???]})#???????????? df = pd.DataFrame([{???user-id???:?????????1???,???item-id???:?????????1???},{???user-id???:?????????2???}])#???????????? #???????????? show = df.head(1)#head(n)????????????n??? show = df.tail(1)#????????????n??? show = df.shape#????????????????????? show = df.info()#?????????????????????????????????????????? show = df.describe()#????????????????????????????????? show = df[???item-id???].value_counts(dropna=False)#??????Series??????????????????????????? show = df[???item-id???].unique()#??????Series??????????????? #???????????? s = df[???user-id???]#(name)????????????????????????????????????Series df = df[[???user-id???,???item-id???]]#(list)???????????????,?????????Dataframe show = s.iloc[0]#????????????????????????Serieas????????????df[???user_id???][0]??? show = df.iloc[0,0:2]#(row,col)???????????????,???0???2??? show = s.loc[???item-id???]#????????????????????? show = df.sample(frac=0.5)#????????????????????? show = df.sample(n=len(df))#?????????????????? ????????? #????????????dataframe?????? df = pd.DataFrame([[1,2,3],[4,5,6]],columns=[???f1???,???f2???,???f3???])#columns???????????? df = pd.DataFrame([{???user-id???:?????????1???,???item-id???:1},{???user-id???:?????????2???}])#???????????? df = pd.DataFrame({???user_id???:[??????1???,??????1???,??????3???],???item_id???:[?????????1???,?????????2???,?????????3???],???test_id???:[12,11,4]})#???????????? ????????? #???????????? show = df.isnull()#????????????????????????,????????????bool?????? show = df.notnull()#???????????????????????????,????????????bool?????? show = df.dropna(axis=0)#?????????????????????????????? show = df.dropna(axis=1)#?????????????????????????????? show = df.dropna(axis=0,thresh=2)#??????????????????(thresh)?????????????????? show = df.fillna(?????????id???)#??????????????????????????????????????????(df[].fillna()) show = df.fillna(df.mode().iloc[0])#???????????? show = df.fillna(df.median())#??????????????? show = df["item-id"].fillna(????????????)#?????????????????? show = df["item-id"].astype(float)#?????????????????????????????? show = df["item-id"].replace(1,???one???)#??????one?????????????????????1?????? show = df.rename(columns=lambda x:x+"1")#?????????????????? show = df.rename(index=lambda x:x+"1")#?????????????????? show = df.rename(columns={???item-id???:?????????id???})#????????????????????? show = df.set_index(???item-id???,append=False) #????????????????????????,append=true??????????????? df1 = df.sample(n=1) df2 = df.sample(frac=0.5)???????????? df3 = pd.concat([df1,df2])#????????????????????? show = df3.reset_index(drop=False)#????????????,drop=true????????????????????? #???????????? show = df[df[???test_id???]>4]#?????????test_id??????>4?????? show = df.sort_values(by=???test_id???,ascending=True)#????????????test_id???????????????????????? show = df.sort_values(by=[???test_id???,???item_id???],ascending=[True,False])#????????????test_id????????????????????????item_id???????????? show = df.groupby(???user_id???)[???test_id???].apply(np.mean)#??????user_id????????????,?????????test_id???????????? show = df.pivot_table(index=???user_id???,values=[???item_id???,???test_id???],aggfunc=min)#???????????????,???,???????????????????????? show = df.groupby(???user_id???,as_index=False)[???test_id???].agg({??????????????????:???count???,??????????????????:???sum???}) #?????????????????????test_id?????????????????????as_index??????????????????????????????????????? show = df[???test_id???].apply(np.mean)#???df???????????????????????????np.mean show = df.apply(np.max,axis=1)#???df???????????????????????????np.mean(???????????????) for index,row in df.iterrows(): #index?????????row?????????????????????????????????????????????????????????????????? print(index,row[???user_id???],row[???test_id???]) for key,df in df.groupby(???user_id???): #key=???user_id???,df????????????????????? print(key,len(df),df) #???????????????????????????user_id?????????,?????????S?????????????????????(rank) #rank:http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.rank.html show = df.groupby(???user_id???,as_index=False)[???test_id???].rank(ascending=False,method=???average???) #???????????? df = df1.append(df2)#???df2??????????????????df1????????? df = pd.concat([df1,df2],axis=1)#???????????????axis=1???,???????????????axis=0???(??????????????????????????????????????????) df = pd.merge(df1,df2,on=???user_id???,how=???inner???)#???df1,df2??????????????????SQL???join?????? #?????????????????? df1 = pd.DataFrame({???user???:[1,2,3,4],???id???:[101,102,103,104]}) df2 = pd.DataFrame({???user???:[1,2]}) df2[???flag???]=1#????????????????????? df = pd.merge(df1,df2,on=???user???,how=???left???)#????????? df = df[df.flag.isnull()].drop(???flag???,axis=1)#???df1????????????df2???????????????????????????flag?????? #???????????? show = df.mean()#???????????????????????? show = df.corr()#?????????????????????????????? show = df.user_id.corr(df.test_id)# show = df.count()#????????????????????????????????? show = df.max()#?????????????????? show = df.min()#?????????????????? show = df.median()#??????????????????????????? show = df.std()#??????????????????????????? show = df.dtypes()#?????????????????? show = df.isnull.sum()#?????????????????????????????? ????????? #???????????? zhfont = matplotlib.font_manager.FontProperties(fname=r???C:/Windows/Fonts/FZYTK.TTF???) # chinese #????????? hist = df[???user_id???].hist()#?????????????????? hist.plot() #????????? s = df[???test_id???] s.plot() df.plot() #?????? fig, axes = plt.subplots(2, 1) s = df[???test_id???] s.plot(kind=???bar???, ax=axes[0], color=???k???, alpha=0.7) s.plot(kind=???barh???, ax=axes[1], color=???k???, alpha=0.7) plt.legend(prop=zhfont) plt.show()
???????????????
http://pandas.pydata.org/pandas-docs/stable/api.html#binary-operator-functions
https://blog.csdn.net/hustqb/article/details/54410670
以上是关于python padas ??????的主要内容,如果未能解决你的问题,请参考以下文章