python padas ??????

Posted 2020-12-24

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了python padas ??????相关的知识，希望对你有一定的参考价值。

?????????clip frame l????????? values head ?????? mysql index rank

import matplotlib
from pandas import DataFrame
import numpy as np
import pandas as pd
import MySQLdb
import matplotlib.pyplot as plt

#df =Padaas dataframe ??????(??????????????????)
#s =Pandas series??????(??????????????????)
db = MySQLdb.connect(host="localhost", port=3306, user="root", passwd="1234", db=???spj???,charset="utf8")#???????????????
filename = ???count_day.csv???#???????????????
query = ???select * from j???#sql????????????
?????????
#????????????

pd.read_excel(filename)#???excel??????????????????
pd.read_table(filename)#???csv??????????????????????????????
pd.read_json(json-string)#???json??????????????????????????????
pd.read_html(url)#??????url???html????????????????????????tables??????
pd.read_clipboard()#?????????????????????????????????read_tables()

#???sql?????????????????????
df=pd.read_sql(query,db)

#???csv????????????
#index_col?????????????????????????????????Dataframe????????????
#??????????????????,??????
df = pd.read_csv(filename,sep=???,???,header=None,index_col=0)

#????????????
df.to_csv(???ans.csv???,index=False,sep=???,???)#???????????????csv
df.to_excel(filename)#???????????????excel
df.to_sql(table_name,db)#???????????????sql
df.to_json(filename)#???json?????????????????????????????????

#????????????dataframe??????
df = pd.DataFrame([[1,2,3],[4,5,6]],columns=[???f1???,???f2???,???f3???])#columns????????????
df = pd.DataFrame({???user-id???:[??????1???,??????2???,??????3???],???item_id???:[?????????1???,?????????2???,?????????3???]})#????????????
df = pd.DataFrame([{???user-id???:?????????1???,???item-id???:?????????1???},{???user-id???:?????????2???}])#????????????

#????????????
show = df.head(1)#head(n)????????????n???
show = df.tail(1)#????????????n???
show = df.shape#?????????????????????
show = df.info()#??????????????????????????????????????????
show = df.describe()#?????????????????????????????????
show = df[???item-id???].value_counts(dropna=False)#??????Series???????????????????????????
show = df[???item-id???].unique()#??????Series???????????????

#????????????
s = df[???user-id???]#(name)????????????????????????????????????Series
df = df[[???user-id???,???item-id???]]#(list)???????????????,?????????Dataframe
show = s.iloc[0]#????????????????????????Serieas????????????df[???user_id???][0]???
show = df.iloc[0,0:2]#(row,col)???????????????,???0???2???
show = s.loc[???item-id???]#?????????????????????
show = df.sample(frac=0.5)#?????????????????????
show = df.sample(n=len(df))#??????????????????
?????????

#????????????dataframe??????
df = pd.DataFrame([[1,2,3],[4,5,6]],columns=[???f1???,???f2???,???f3???])#columns????????????
df = pd.DataFrame([{???user-id???:?????????1???,???item-id???:1},{???user-id???:?????????2???}])#????????????
df = pd.DataFrame({???user_id???:[??????1???,??????1???,??????3???],???item_id???:[?????????1???,?????????2???,?????????3???],???test_id???:[12,11,4]})#????????????
?????????
#????????????
show = df.isnull()#????????????????????????,????????????bool??????
show = df.notnull()#???????????????????????????,????????????bool??????
show = df.dropna(axis=0)#??????????????????????????????
show = df.dropna(axis=1)#??????????????????????????????
show = df.dropna(axis=0,thresh=2)#??????????????????(thresh)??????????????????
show = df.fillna(?????????id???)#??????????????????????????????????????????(df[].fillna())
show = df.fillna(df.mode().iloc[0])#????????????
show = df.fillna(df.median())#???????????????
show = df["item-id"].fillna(????????????)#??????????????????

show = df["item-id"].astype(float)#??????????????????????????????
show = df["item-id"].replace(1,???one???)#??????one?????????????????????1??????
show = df.rename(columns=lambda x:x+"1")#??????????????????
show = df.rename(index=lambda x:x+"1")#??????????????????
show = df.rename(columns={???item-id???:?????????id???})#?????????????????????
show = df.set_index(???item-id???,append=False) #????????????????????????,append=true???????????????

df1 = df.sample(n=1)
df2 = df.sample(frac=0.5)????????????
df3 = pd.concat([df1,df2])#?????????????????????
show = df3.reset_index(drop=False)#????????????,drop=true?????????????????????

#????????????
show = df[df[???test_id???]>4]#?????????test_id??????>4??????
show = df.sort_values(by=???test_id???,ascending=True)#????????????test_id????????????????????????
show = df.sort_values(by=[???test_id???,???item_id???],ascending=[True,False])#????????????test_id????????????????????????item_id????????????
show = df.groupby(???user_id???)[???test_id???].apply(np.mean)#??????user_id????????????,?????????test_id????????????
show = df.pivot_table(index=???user_id???,values=[???item_id???,???test_id???],aggfunc=min)#???????????????,???,????????????????????????

show = df.groupby(???user_id???,as_index=False)[???test_id???].agg({??????????????????:???count???,??????????????????:???sum???})
#?????????????????????test_id?????????????????????as_index???????????????????????????????????????
show = df[???test_id???].apply(np.mean)#???df???????????????????????????np.mean
show = df.apply(np.max,axis=1)#???df???????????????????????????np.mean(???????????????)

for index,row in df.iterrows():
    #index?????????row??????????????????????????????????????????????????????????????????
    print(index,row[???user_id???],row[???test_id???])

for key,df in df.groupby(???user_id???):
    #key=???user_id???,df?????????????????????
    print(key,len(df),df)

#???????????????????????????user_id?????????,?????????S?????????????????????(rank)
#rank:http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.rank.html
show = df.groupby(???user_id???,as_index=False)[???test_id???].rank(ascending=False,method=???average???)

#????????????
df = df1.append(df2)#???df2??????????????????df1?????????
df = pd.concat([df1,df2],axis=1)#???????????????axis=1???,???????????????axis=0???(??????????????????????????????????????????)
df = pd.merge(df1,df2,on=???user_id???,how=???inner???)#???df1,df2??????????????????SQL???join??????


#??????????????????
df1 = pd.DataFrame({???user???:[1,2,3,4],???id???:[101,102,103,104]})
df2 = pd.DataFrame({???user???:[1,2]})
df2[???flag???]=1#?????????????????????
df = pd.merge(df1,df2,on=???user???,how=???left???)#?????????
df = df[df.flag.isnull()].drop(???flag???,axis=1)#???df1????????????df2???????????????????????????flag??????

#????????????
show = df.mean()#????????????????????????
show = df.corr()#??????????????????????????????
show = df.user_id.corr(df.test_id)#
show = df.count()#?????????????????????????????????
show = df.max()#??????????????????
show = df.min()#??????????????????
show = df.median()#???????????????????????????
show = df.std()#???????????????????????????
show = df.dtypes()#??????????????????
show = df.isnull.sum()#??????????????????????????????
?????????
#????????????
zhfont = matplotlib.font_manager.FontProperties(fname=r???C:/Windows/Fonts/FZYTK.TTF???) # chinese

#?????????
hist = df[???user_id???].hist()#??????????????????
hist.plot()

#?????????
s = df[???test_id???]
s.plot()
df.plot()

#??????
fig, axes = plt.subplots(2, 1)
s = df[???test_id???]
s.plot(kind=???bar???, ax=axes[0], color=???k???, alpha=0.7)
s.plot(kind=???barh???, ax=axes[1], color=???k???, alpha=0.7)


plt.legend(prop=zhfont)
plt.show()

???????????????

http://pandas.pydata.org/pandas-docs/stable/api.html#binary-operator-functions

https://blog.csdn.net/hustqb/article/details/54410670

以上是关于python padas ??????的主要内容，如果未能解决你的问题，请参考以下文章