python数据分析-08透视表以及matplotlib库
Posted nikecode
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python数据分析-08透视表以及matplotlib库相关的知识,希望对你有一定的参考价值。
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
"""
df = pd.read_excel("sales-funnel.xlsx")
#print(df)
# Account Name ... Price Status
# 0 714466 Trantow-Barrows ... 30000 Presented
# 1 714466 Trantow-Barrows ... 10000 Presented
# 2 714466 Trantow-Barrows ... 5000 Pending
# 3 737550 Fritsch,Russel and Anderson ... 35000 declined
# 4 146832 Kiehn-Spinka ... 65000 won
# 5 218895 Kulas Inc ... 40000 Pending
# 6 218895 Kulas Inc ... 10000 Presented
# 7 412290 Jerde-Hilpert ... 5000 Pending
# 8 740150 Barton-LLC ... 35000 declined
# 9 141962 Barton-LLC ... 65000 won
# 10 163416 Purdy-Kunde ... 30000 Presented
# 11 239344 Stokes-LLC ... 5000 Pending
# 12 239344 Stokes-LLC ... 10000 Presented
# 13 307599 Kassulke,Ondricka and Metz ... 7000 won
# 14 688981 Keeling LLC ... 100000 won
# 15 729833 Koepp-Ltd ... 65000 declined
# 16 729833 Koepp-Ltd ... 5000 Presented
#
# [17 rows x 8 columns]
print(pd.pivot_table(df,index=["Name"]))
# Account Price Quantity
# Name
# Barton-LLC 441056 50000 1.500000
# Fritsch,Russel and Anderson 737550 35000 1.000000
# Jerde-Hilpert 412290 5000 2.000000
# Kassulke,Ondricka and Metz 307599 7000 3.000000
# Keeling LLC 688981 100000 5.000000
# Kiehn-Spinka 146832 65000 2.000000
# Koepp-Ltd 729833 35000 2.000000
# Kulas Inc 218895 25000 1.500000
# Purdy-Kunde 163416 30000 1.000000
# Stokes-LLC 239344 7500 1.000000
# Trantow-Barrows 714466 15000 1.333333
print(pd.pivot_table(df,index=["Manager","Rep"],values=["Price","Quantity"],aggfunc=sum))
# Price Quantity
# Manager Rep
# Debra-Henley Craig-Booker 80000 5
# Daniel-Hilton 115000 5
# John-Smith 40000 3
# Fred-Anderson Cedric-Moss 110000 5
# Wendy-Yule 177000 12
"""
"""
#------------------
#分组和透视功能实战
link = "https://projects.fivethirtyeight.com/flights"
#假设文件usa_flights.csv 文件数据完整
df = pd.read_csv("usa_flights.csv")
print(df.shape)#(201664,14)
#获取延误时间最长的top10
print(df.sort_values("arr_delay",ascending=False)[:10])
#计算延误和没有延误所占比例
print(df["cancelled"].value_counts())
# 0 196873
# 1 4791
# Name: cancelled,dtype:int64
df["delayed"] = df["arr_delay"].apply(lambda x:x>0)
print(df.head())
print(df["delayed"].value_counts())
#False 103037
#True 98627
#Name:delayed ,dtype:int64
delay_data = df["delayed"].value_counts()
print(delay_data[1]/(delay_data[0]+delay_data[1]))
#0.4890659...
#每个航空公司延误的情况
delay_group = df.groupby(["unique_carrier","delayed"])
print(delay_group.size())
df_delay = delay_group.size().unstack()
print(df_delay)
import matplotlib.pyplot as plt
df_delay.plot()
plt.show()
"""
#---------------------------------------------
#Matplotlib介绍
#为什么用Python画图
#GUI太复杂
#Excel太头疼
#Python简单免费()
#什么是matplotlib?
#一个Python包
#用于2D绘图
#非常强大和流行
#有很多扩展
import matplotlib.pyplot as plt
import numpy as np
# x = np.linspace(0,2*np.pi,100)
# y = np.sin(x)
# plt.plot(x,y)
# plt.show()
#Matplotlib Architecture 架构
#Backend:主要处理把图显示到哪里和画到哪里:
#Artist:图像显示成什么样?
#Scripting:pyplot,python语法和API
"""
#matplotlib的简单绘图-plot
import numpy as np
import matplotlib.pyplot as plt
# a = [1,2,3]
# #plt.plot(a)#这里画图取值x轴分别是a的index指标,0,1,2,y轴是1,2,3
# b = [4,5,6]
# plt.plot(a,b)#这里画图取值x轴分别是a的值,1,2,3,y轴是4,5,6
# plt.show()
# #会报错
# a = [1,2,3]
# b = [4,5,6,7]
# plt.plot(a,b)
# plt.show()
# a = [1,2,3]
# #plt.plot(a)#这里画图取值x轴分别是a的index指标,0,1,2,y轴是1,2,3
# b = [4,5,6]
# # plt.plot(a,b,"*")#用*号表示点
# # plt.plot(a,b,"b--")
# # plt.show()
#
# c = [10,8,6]
# d = [1,8,3]
# plt.plot(a,b,"b--",c,d,"r*")
# plt.show()
t = np.arange(0.0,2.0,0.1)
print(t.size)
s = np.sin(t*np.pi)
print(s.size)
plt.plot(t,s,"r--",label="aaaa")
plt.plot(t*2,s,"--",label=‘bbbb‘)
plt.xlabel("This is X")
plt.ylabel("This is Y")
plt.title("This is a Demo")
plt.legend()#这里是将label显示出来
plt.show()
"""
#---------------------------------------
#matplotlib简单绘图之subplot
# x = np.linspace(0.0,5.0)
# y1 = np.sin(np.pi*x)
# y2 = np.sin(np.pi*x*2)
# plt.subplot(2,1,1)#表示切换到2行1列子图的第一个位置画图
# plt.plot(x,y1,"b--")
# plt.ylabel("y1")
# plt.subplot(2,1,2)#表示切换到2行1列子图的第二个位置画图
# plt.plot(x,y2,"r--")
# plt.ylabel("y2")
# plt.xlabel("X")
# plt.show()
#更改图片布局:改为两行两列,左右图片
# x = np.linspace(0.0,5.0)
# y1 = np.sin(np.pi*x)
# y2 = np.sin(np.pi*x*2)
# plt.subplot(2,2,1)
# plt.plot(x,y1,"b--")
# plt.ylabel("y1")
# plt.subplot(2,2,2)
# plt.plot(x,y2,"r--")
# plt.ylabel("y2")
# plt.xlabel("X")
# plt.show()
#更改图片布局:改为两行两列
# x = np.linspace(0.0,5.0)
# y1 = np.sin(np.pi*x)
# y2 = np.sin(np.pi*x*2)
#
# plt.subplot(2,2,1)
# plt.plot(x,y1,"b--")
# plt.ylabel("y1")
#
# plt.subplot(2,2,2)#表示切换到2行1列子图的第二个位置画图
# plt.plot(x,y2,"r--")
# plt.ylabel("y2")
#
#
# plt.subplot(2,2,3)#表示切换到2行1列子图的第二个位置画图
# plt.plot(x,y2,"r*")
# plt.ylabel("y2")
#
# plt.subplot(2,2,4)
# plt.plot(x,y1,"b*")
# plt.ylabel("y1")
#
# plt.xlabel("X")
# plt.show()
#a = plt.subplots()
#print(a)#(<Figure size 640x480 with 1 Axes>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000000BE2B438>)
# print(type(a))#<class ‘tuple‘>
# print(a[0])#Figure(640x480) 表示画图,一块画布
# print(a[1])#AxesSubplot(0.125,0.11;0.775x0.77)#表示画笔
# figure,ax = plt.subplots()
# ax.plot([1,2,3,4,5])
# plt.show()#是正常的画图
# x = np.linspace(0.0,5.0)
# y1 = np.sin(np.pi*x)
# y2 = np.sin(np.pi*x*2)
# figure,ax = plt.subplots(2,2)#设为2行2列
# ax[0][0].plot(x,y1)
# ax[0][1].plot(x,y2)
# plt.show()#是正常的画图
以上是关于python数据分析-08透视表以及matplotlib库的主要内容,如果未能解决你的问题,请参考以下文章
Python:在 python 中重现嵌套的 Excel 数据透视表