如何在绘图中创建按组着色的带注释的条
Posted
技术标签:
【中文标题】如何在绘图中创建按组着色的带注释的条【英文标题】:How to create annotated bars colored by group in plotly 【发布时间】:2021-12-29 04:15:22 【问题描述】:Matplotlib
是一个用于 Python 的跨平台数据可视化和图形绘图库,可高度自定义。
Matplotlib
有很大的定制优势。下面是带有matploltlib
的自定义直方图
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.ticker as ticker
from matplotlib.patches import Rectangle
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Polygon
from matplotlib.patches import ConnectionPatch
def customized_Histogram(df, j):
"""
Generate Histogram
Parameters:
****************
df:
pandas dataframe
j(str):
column name in str
****************
Generates Advanced Histogram
"""
# Colours for different percentiles
perc_25_colour = 'gold'
perc_50_colour = 'mediumaquamarine'
perc_75_colour = 'deepskyblue'
perc_95_colour = 'peachpuff'
# Plot the Histogram from the random data
fig, ax = plt.subplots(figsize=(14,8))
# '''
# counts = numpy.ndarray of count of data ponts for each bin/column in the histogram
# bins = numpy.ndarray of bin edge/range values
# patches = a list of Patch objects.
# each Patch object contains a Rectnagle object.
# e.g. Rectangle(xy=(-2.51953, 0), width=0.501013, height=3, angle=0)
# '''
counts, bins, patches = ax.hist(df[j], facecolor=perc_50_colour, edgecolor='gray')
# Set the ticks to be at the edges of the bins.
ax.set_xticks(bins.round(2))
plt.xticks(rotation=70)
# Set the graph title and axes titles
plt.title(f'Distribution of j', fontsize=20)
plt.ylabel('Count', fontsize=15)
plt.xlabel(j, fontsize=15)
# Change the colors of bars at the edges
twentyfifth, seventyfifth, ninetyfifth = np.percentile(df[j], [25, 75, 95])
for patch, leftside, rightside in zip(patches, bins[:-1], bins[1:]):
if rightside < twentyfifth:
patch.set_facecolor(perc_25_colour)
elif leftside > ninetyfifth:
patch.set_facecolor(perc_95_colour)
elif leftside > seventyfifth:
patch.set_facecolor(perc_75_colour)
# Calculate bar centre to display the count of data points and %
bin_x_centers = 0.5 * np.diff(bins) + bins[:-1]
bin_y_centers = ax.get_yticks()[1] * 0.25
# Display the the count of data points and % for each bar in histogram
for i in range(len(bins)-1):
bin_label = "0:,".format(counts[i]) + " (0:,.2f%)".format((counts[i]/counts.sum())*100)
plt.text(bin_x_centers[i],
bin_y_centers,
bin_label,
rotation=90,
rotation_mode='anchor')
# Annotation for bar values
ax.annotate('Each bar shows count and percentage of total',
xy=(.80,.30),
xycoords='figure fraction',
horizontalalignment='center',
verticalalignment='bottom',
fontsize=10,
bbox=dict(boxstyle="round",
fc="white"),
rotation=-90)
#create legend
handles = [Rectangle((0,0),1,1,color=c,ec="k") for c in [
perc_25_colour,
perc_50_colour,
perc_75_colour,
perc_95_colour
]
]
labels= ["0-25 Percentile","25-50 Percentile", "50-75 Percentile", ">95 Percentile"]
plt.legend(handles, labels, bbox_to_anchor=(0.5, 0., 0.80, 0.99))
# fig.savefig("filename.jpg",dpi=150, bbox_inches='tight')
plt.show()
import seaborn as sns
tips = sns.load_dataset("tips")
customized_Histogram(tips, "total_bill")
自定义直方图
如何使用Plotly
绘制上面的图?
【问题讨论】:
【参考方案1】: plotly 可以以同样的方式构建自定义人物。从根本上说,它了解 API 和功能(与 matplotlib 解决方案一样) matplotlibhist()
和 numpy histogram()
基本相同。两者都返回 counts 和 bins https://numpy.org/doc/stable/reference/generated/numpy.histogram.html
然后使用矢量化逻辑而不是过程逻辑来定义条形的颜色
plotly 没有直接在条形边缘有刻度的概念。已将 bar 的 x 值移动到 bin 的中间,然后从 bin 边缘定义 xaxis
def plotly_histogram(df_in, col):
col = "total_bill"
df = pd.DataFrame(np.histogram(df_in[col])).T.rename(columns=0: "count", 1: "edge")
# center bars between edges...
df["x"] = df["edge"] + df["edge"].diff().mode()[0] / 2
cmap =
"0-25 Percentile": "gold",
"25-50 Percentile": "mediumaquamarine",
"50-75 Percentile": "deepskyblue",
">95 Percentile": "peachpuff",
df["color"] = np.select(
[
df["edge"].shift(s) < np.percentile(df_in[col], p)
for p, s in zip([25, 50, 95], [-1, 1, 0])
],
list(cmap.keys())[0:3],
list(cmap.keys())[3],
)
df["text"] = df["count"].apply(lambda c: f"c (c/len(df_in)*100:.2f)")
fig = px.bar(
df,
x="x",
y="count",
color="color",
color_discrete_map=cmap,
hover_data="x": False, "edge": ":.2f",
).update_layout(
xaxis=
"tickmode": "array",
"tickvals": df["edge"],
"title": col,
"tickangle": 285,
"tickformat": ".2f",
)
for r in df.dropna().loc[:, ["x", "text"]].iterrows():
fig.add_annotation(
x=r[1]["x"],
y=0,
text=r[1]["text"],
showarrow=False,
textangle=270,
yanchor="bottom",
yshift=10,
)
return fig
import seaborn as sns
import pandas as pd
import numpy as np
import plotly.express as px
plotly_histogram(sns.load_dataset("tips"), "total_bill")
【讨论】:
感谢回答,如何减少/去除每条之间的空白?bargap=0
作为update_layout()
中的附加参数
使用bargap=0
增加条宽,有没有办法设置条宽??
设置图形宽度以上是关于如何在绘图中创建按组着色的带注释的条的主要内容,如果未能解决你的问题,请参考以下文章