数据分析之matplotlib
Posted xiaoqianbook
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了数据分析之matplotlib相关的知识,希望对你有一定的参考价值。
CONDA环境安装
官方地址:https://www.anaconda.com
配置环境:
在系统path环境中添加
C:ProgramDataAnaconda3
C:ProgramDataAnaconda3Libraryin
C:ProgramDataAnaconda3Scripts
创建环境:
conda create -n python3 python=3.6
切换环境:
Windows:activate python3
linux:source activate python3
jupyter和conda的使用
安装jupyter
conda install jupyter
启动jupyter
jupyter notebook
matplotlib的安装
conda install matplotlib
绘制折线图
运行以下代码
from matplotlib import pyplot as plt x = range(2, 26, 2) y = [15, 13, 14.5, 17, 20, 25, 26, 26, 27, 22, 18, 15] # 绘制图形 plt.plot(x, y) # 展示图形 plt.show()
运行效果
注意:先保存图片在展示图片
升级版
from matplotlib import pyplot as plt x = range(2, 26, 2) y = [15, 13, 14.5, 17, 20, 25, 26, 26, 27, 22, 18, 15] # 设置图片大小 plt.figure(figsize=(28, 8), dpi=80) # figure图形图标 dpi增加图片清晰 # 设置x轴的刻度 li = [i/2 for i in range(4, 52)] plt.xticks(li[::2]) # 设置y轴的刻度 plt.yticks(range(min(y), max(y)+1)) # 绘制图形 plt.plot(x, y) # 保存图片 plt.savefig("./t1.png") # 展示图形 plt.show()
设置中文
matplotlib默认不支持中文符号,因为默认英文字体无法显示汉字
linux/mac下支持字体;
fc-list 查看支持字体
fc-list :lang=zh 查看支持的中文
matplotlib.rc 可以修改
windows下的微软雅黑目录
C:WindowsFontsmsyh.ttc
题目1
如果列表a表示10点到12点的每一分钟的气温,如何绘制折线图观察每一分钟气温的变化情况?
a = [random.randint(20,35) for i in range(120)]
from matplotlib import pyplot as plt import random from matplotlib import font_manager my_font = font_manager.FontProperties(fname = "C:WindowsFontsmsyh.ttc") # 添加字体 x = range(0, 120) y = [random.randint(20, 35) for i in range(120)] plt.figure(figsize=(28, 8), dpi=80) plt.plot(x, y) # 调整x轴的密度 _xticks_labels = ["10点{}分".format(i) for i in range(60)] _xticks_labels += ["11点{}分".format(i) for i in range(60)] # 取步长,数字和字符一一对应数据的长度一样 plt.xticks(list(x)[::3], _xticks_labels[::3], rotation=45, fontproperties=my_font) # rotation 旋转的度数 plt.yticks(range(min(y), max(y)+1)) plt.xlabel("时间",fontproperties=my_font) plt.ylabel("温度 单位(‘C)",fontproperties=my_font) plt.title("10点到12点每分钟气温变化情况", fontproperties=my_font) plt.show()
题目2
假设大家在30岁的时候,根据自己的实际情况统计出来从11岁到30岁每年交的女(男)朋友的数量如a,请绘制出该数据的折线图,以便分析每年交女(男)朋友的数量走势
a = [1,0,1,1,2,5,3,2,3,4,4,5,6,5,4,3,3,1,1,1]
要求:
y轴表示个数
x轴表示岁数,比如11岁,12岁
from matplotlib import pyplot as plt from matplotlib import font_manager my_font = font_manager.FontProperties(fname="C:WindowsFontsmsyh.ttc") x = range(11, 31) y = [1, 0, 1, 1, 2, 5, 3, 2, 3, 4, 4, 5, 6, 5, 4, 3, 3, 1, 1, 1] plt.figure(figsize=(28, 8), dpi=80) plt.plot(x, y) _xticks_labels = ["{}岁".format(i) for i in range(11, 31)] plt.xticks(x, _xticks_labels, fontproperties=my_font) plt.yticks(range(min(y), max(y)+1)) plt.xlabel("岁数", fontproperties=my_font) plt.ylabel("男女朋友个数", fontproperties=my_font) plt.title("女(男)朋友的数量走势",fontproperties = my_font) plt.show()
升级版
假设大家在30岁的时候,根据自己的实际情况统计出来从11岁到30岁每年交的女(男)朋友的数量如a,b请绘制出该数据的折线图,以便分析每年交女(男)朋友的数量走势
a = [1, 0, 1, 1, 2, 5, 3, 2, 3, 4, 4, 5, 6, 5, 4, 3, 3, 1, 1, 1] b = [1, 0, 3, 1, 2, 2, 3, 3, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]
要求:
y轴表示个数
x轴表示岁数,比如11岁,12岁
from matplotlib import pyplot as plt from matplotlib import font_manager my_font = font_manager.FontProperties(fname="C:WindowsFontsmsyh.ttc") x = range(11, 31) y1 = [1, 0, 1, 1, 2, 5, 3, 2, 3, 4, 4, 5, 6, 5, 4, 3, 3, 1, 1, 1] y2 = [1, 0, 3, 1, 2, 2, 3, 3, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1] # 设置图片大小 plt.figure(figsize=(28, 8), dpi=80) plt.plot(x, y1, label="自己", color="red",linestyle="--",linewidth=5) plt.plot(x, y2, label="同桌", color="green", linestyle=":", linewidth=3) # 设置x轴刻度 _xticks_labels = ["{}岁".format(i) for i in range(11, 31)] plt.xticks(x, _xticks_labels, fontproperties=my_font) # plt.yticks(range(0, 9)) plt.xlabel("岁数", fontproperties=my_font) plt.ylabel("男女朋友个数", fontproperties=my_font) plt.title("女(男)朋友的数量走势",fontproperties = my_font) # 绘制网格 plt.grid(alpha=0.4) # 添加图例 plt.legend(prop=my_font,loc="upper left") # 展示 plt.show()
总结
绘制散点图
题目3
假设通过爬虫你获取到了某地3月10月的每天白天的最高气温(分别位于列表a,b)那么此时如何寻找出气温和随时间(天)变化的某种规律?
a = [11,17,16,11,12,6,6,7,8,9,8,12,15,14,17,18,21,15,17,20,14,15,15,15,19,21,22,22,22,23,20] b = [26,26,28,19,21,20,19,17,16,19,18,20,20,19,22,23,17,20,22,15,11,15,5,13,17,10,11,13,12,13,6]
from matplotlib import pyplot as plt from matplotlib import font_manager x_3 = range(1, 32) x_10 = range(51,82) y_3 = [11,17,16,11,12,6,6,7,8,9,8,12,15,14,17,18,21,15,17,20,14,15,15,15,19,21,22,22,22,23,20] y_10 = [26,26,28,19,21,20,19,17,16,19,18,20,20,19,22,23,17,20,22,15,11,15,5,13,17,10,11,13,12,13,6] # 设置字体 my_font = font_manager.FontProperties(fname="C:WindowsFontsmsyh.ttc") # 设置图片大小 plt.figure(figsize=(28, 8), dpi=80) # figure图形图标 dpi增加图片清晰 # 绘制散点图 plt.scatter(x_3, y_3 ,label="3月份") plt.scatter(x_10,y_10, label="10月份") # 调整x轴的刻度 _x = list(x_3)+list(x_10) _xtick_labels = ["3月{}日".format(i) for i in x_3] _xtick_labels += ["10月{}日".format(i-50) for i in x_10] plt.xticks(_x[::3],_xtick_labels[::3], rotation=45, fontproperties=my_font) # 显示图例 plt.legend(prop=my_font,loc="upper left") # 显示图片 plt.xlabel("月份",fontproperties=my_font) plt.ylabel("温度",fontproperties=my_font) plt.title("气温和随时间(天)变化的某种规律",fontproperties=my_font) # 显示图片 plt.show()
绘制条形图
题目4
假设你获取了2018年国内地电影票房前20的电影(列表a)和电影票房数据(列表b)哪么如何更加直观的展示该数据
a = ["战狼2","红海行动","美人鱼","唐人街神探2","我不是药神","速度与激情8","西虹市首富","速度与激情7","捉妖记","复仇者联盟3:无限战争","捉妖记2","羞羞的铁拳","变形金刚4:绝迹求生","前任3:再见前任","功夫瑜伽","侏罗纪世界2"]
b = [56.32,36.22,33.9,33.71,30.75,26.46,25.25,24.26,24.21,23.7,22.19,21.9,19.79,19.26,17.53,16.79]
from matplotlib import pyplot as plt from matplotlib import font_manager my_font = font_manager.FontProperties(fname="C:WindowsFontsmsyh.ttc") a = ["战狼2","红海行动","美人鱼","唐人街神探2","我不是药神","速度与激情8","西虹市首富","速度与激情7","捉妖记","复仇者联盟3:无限战争","捉妖记2","羞羞的铁拳","变形金刚4:绝迹求生","前任3:再见前任","功夫瑜伽","侏罗纪世界2"] b = [56.32,36.22,33.9,33.71,30.75,26.46,25.25,24.26,24.21,23.7,22.19,21.9,19.79,19.26,17.53,16.79] # 设置图片大小 plt.figure(figsize=(20,15),dpi=80) # 绘制条形图 plt.bar(range(len(a)), b,width=0.3) # 设置字符串到x轴 plt.xticks(range(len(a)),a,fontproperties=my_font,rotation=90) # 保存图片 plt.savefig(‘./统计图.png‘) # 显示图片 plt.show()
升级版横排版
from matplotlib import pyplot as plt from matplotlib import font_manager my_font = font_manager.FontProperties(fname="C:WindowsFontsmsyh.ttc") a = ["战狼2","红海行动","美人鱼","唐人街神探2","我不是药神","速度与激情8","西虹市首富","速度与激情7","捉妖记","复仇者联盟3:无限战争","捉妖记2","羞羞的铁拳","变形金刚4:绝迹求生","前任3:再见前任","功夫瑜伽","侏罗纪世界2"] b = [56.32,36.22,33.9,33.71,30.75,26.46,25.25,24.26,24.21,23.7,22.19,21.9,19.79,19.26,17.53,16.79] # 设置图片大小 plt.figure(figsize=(20,15),dpi=80) # 绘制条形图 plt.barh(range(len(a)), b, height=0.3, color="red") # 设置字符串到x轴 plt.yticks(range(len(a)),a,fontproperties=my_font) # 保存图片 plt.savefig(‘./统计图2.png‘) # 显示网格 plt.grid(alpha=0.5) # 显示图片 plt.show()
题目5
假设列表a中电影分别在2017-09-14(b_14),2017-09-15(b_15),2017-09-16(b_16)三天的票房,为展示列表中电影本身的票房以及同其他电影的数据对比情况,应该如何直观呈现该数据?
a = ["星球崛起3:终极之战","敦刻尔克","英雄归来","战狼2"]
b_16 =[15746,312,4497,319]
b_15 =[12357,156,2045,168]
b_14 =[2358,399,2358,362]
from matplotlib import pyplot as plt from matplotlib import font_manager my_font = font_manager.FontProperties(fname="C:WindowsFontsmsyh.ttc") a = ["星球崛起3:终极之战","敦刻尔克","英雄归来","战狼2"] # 设置图片大小 plt.figure(figsize=(20, 8), dpi=80) b_16 =[15746, 312, 4497, 319] b_15 =[12357, 156, 2045, 168] b_14 =[2358, 399, 2358, 362] bar_width = 0.2 x_14 = list(range(len(a))) x_15 = [i+bar_width for i in x_14] x_16 = [i+bar_width*2 for i in x_14] plt.bar(range(len(a)),b_14,width=bar_width,label="9月14日") plt.bar(x_15,b_15,width=bar_width,label="9月14日") plt.bar(x_16,b_16,width=bar_width,label="9月14日") plt.xticks(x_15,a,fontproperties=my_font) # 设置图裂 plt.legend(prop=my_font) # 标题 plt.title("电影3天分析图", fontproperties=my_font) # 显示图片 plt.show()
绘制直方图
题目6
假设你获取了250部电影的时长a列表,希望从这些电影时长的分布状态(比如时长100分钟到120分钟电影的数量出现的频率)等信息你应该如何分析呈现的数据?
a=[131,99,126,129,142,120,113,90,94,135,131,129,136,129,102,120,103,90,114,135,121,119,136,119,112,120,113,100,94,115,101,99,126,129,142,120,133,90,94,135,161,99,126,129,142,120,143,90,94,135,141,99,126,129,162,120,113,90,94,135,101,99,126,129,142,120,112,90,94,135,130,99,126,129,142,140,113,90,94,135,136,99,126,129,162,120,113,90,94,135,134,99,126,129,142,120,113,120,94,135,135,99,126,129,142,120,133,90,94,135,136,99,126,129,142,124,113,90,94,135,137,99,126,129,142,120,113,95,135,111,138,99,126,129,142,126,113,90,94,135,139,99,126,129,142,128,113,96,94,135,131,99,126,129,142,129,113,90,94,135,133,99,126,129,142,120,113,90,94,135,121,99,126,129,142,130,113,92,94,135,131,99,126,129,142,120,113,90,94,135,141,99,126,129,142,120,113,90,94,135,151,99,126,129,142,120,113,90,94,135,131,99,126,129,142,140,113,90,94,135,131,99,126,129,142,120,113,91,94,135,131,99,126,129,142,120,113,90,94,135,131,161,99,126,129,142,120,113,90,111]
from matplotlib import pyplot as plt a = [131,99,126,129,142,120,113,90,94,135,131,129,136,129,102,120,103,90,114,135,121,119,136,119,112,120,113,100,94,115,101,99,126,129,142,120,133,90,94,135,161,99,126,129,142,120,143,90,94,135,141,99,126,129,162,120,113,90,94,135,101,99,126,129,142,120,112,90,94,135,130,99,126,129,142,140,113,90,94,135,136,99,126,129,162,120,113,90,94,135,134,99,126,129,142,120,113,120,94,135,135,99,126,129,142,120,133,90,94,135,136,99,126,129,142,124,113,90,94,135,137,99,126,129,142,120,113,95,135,111,138,99,126,129,142,126,113,90,94,135,139,99,126,129,142,128,113,96,94,135,131,99,126,129,142,129,113,90,94,135,133,99,126,129,142,120,113,90,94,135,121,99,126,129,142,130,113,92,94,135,131,99,126,129,142,120,113,90,94,135,141,99,126,129,142,120,113,90,94,135,151,99,126,129,142,120,113,90,94,135,131,99,126,129,142,140,113,90,94,135,131,99,126,129,142,120,113,91,94,135,131,99,126,129,142,120,113,90,94,135,131,161,99,126,129,142,120,113,90,111] # 计算组数 d = 3 # 组距 num_bins = (max(a)-min(a))//d # 分为多个组 # 设置图片大小 plt.figure(figsize=(20, 8), dpi=80) plt.hist(a, num_bins, normed=True) # 设置x轴刻度 plt.xticks(range(min(a), max(a)+d, d)) plt.grid() plt.show()
题目7
根据他们所需要的时间通过抽样统计列出以下列表的数据,这些数据能绘制成直方图吗?
Data by absolute numbers
Interval | Width | Quantity | Quantity/width |
---|---|---|---|
0 | 5 | 4180 | 836 |
5 | 5 | 13687 | 2737 |
10 | 5 | 18618 | 3723 |
15 | 5 | 19634 | 3926 |
20 | 5 | 17981 | 3596 |
25 | 5 | 7190 | 1438 |
30 | 5 | 16369 | 3273 |
35 | 5 | 3212 | 642 |
40 | 5 | 4122 | 824 |
45 | 15 | 9200 | 613 |
60 | 30 | 6461 | 215 |
90 | 60 | 3435 | 57 |
interval = [0,5,10,15,20,25,30,35,40,45,60,90]
width = [5,5,5,5,5,5,5,5,5,15,30,60]
quantity = [4180,13687,18618,19634,17981,7190,16369,3212,4122,9200,6461,3435]
from matplotlib import pyplot as plt interval = [0,5,10,15,20,25,30,35,40,45,60,90] width = [5,5,5,5,5,5,5,5,5,15,30,60] quantity = [4180,13687,18618,19634,17981,7190,16369,3212,4122,9200,6461,3435] # 设置图片大小 plt.figure(figsize=(20,8), dpi=80) plt.bar(range(len(quantity)),quantity,width=1) # 设置x 轴位置 _x = [i - 0.5 for i in range(len(quantity)+1)] _xtick_labels = interval+["150"] plt.xticks(_x, _xtick_labels) plt.grid() plt.show()
以上是关于数据分析之matplotlib的主要内容,如果未能解决你的问题,请参考以下文章
数据可视化之MATPLOTLIB实战:PLT.POLAR()函数 绘制极线图 (转载)
数据可视化之MATPLOTLIB实战:PLT.POLAR()函数 绘制极线图 (转载)