在ggplot直方图中排序计数

Posted

技术标签:

【中文标题】在ggplot直方图中排序计数【英文标题】:Ordering number of counts in ggplot histogram 【发布时间】:2021-11-10 05:13:39 【问题描述】:

我有以下数据表dt.test

dput(dt.test)
structure(list(Year = c(2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L), GPNRPlan = c(100236L, 
111328L, 128003L, 130058L, 133874L, 135886L, 138153L, 142309L, 
151988L, 157024L, 158101L, 158757L, 163260L, 200324L, 200792L, 
207342L, 214062L, 217331L, 218005L, 218454L, 219836L, 238154L, 
254868L, 254869L, 254874L, 259947L, 264205L, 267717L, 282089L, 
283151L, 292166L, 306032L, 307995L, 330523L, 335787L, 348047L, 
348664L, 355409L, 363008L, 365676L, 368657L, 392114L, 394894L, 
395993L, 404010L, 500269L, 502879L, 515783L, 516704L, 613479L
), TB.Info = c("Below TB", "In TB", "Over TB", "Below TB", "Over TB", 
"Below TB", "Over TB", "In TB", "In TB", "In TB", "In TB", "In TB", 
"Below TB", "In TB", "Over TB", "In TB", "Below TB", "In TB", 
"Below TB", "Over TB", "Over TB", "Below TB", "Over TB", "Over TB", 
"In TB", "Over TB", "Over TB", "Below TB", "Below TB", "In TB", 
"Below TB", "Below TB", "In TB", "In TB", "Over TB", "In TB", 
"Below TB", "Over TB", "Below TB", "Below TB", "Over TB", "Below TB", 
"Below TB", "Below TB", "Over TB", "Over TB", "Below TB", "In TB", 
"Below TB", "Below TB"), EV = c(0, 8991.71216732506, 0, 0, 952495.536027306, 
15787.4125389769, 5881.79901225216, 10020.1539658745, 29679.260860874, 
6623.90844477817, 19956.607049949, 10795.0898132095, 20110.9214012254, 
154265.744138562, 0, 18022.1733304456, 0, 46930.0874076566, 5031.92796650957, 
14766.994353086, 17428.2699502266, 1139.95904700027, 0, 0, 74480.5539097248, 
0, 0, 3125.39890360564, 41135.8019532497, 45455.901141461, 18068.8194807139, 
1574.19941919776, 71695.2819940052, 840619.522035336, 19137.7135190254, 
17462.2344585882, 6001.7267195835, 14056.3435516429, 0, 0, 51034.1108681587, 
2680.21093607705, 0, 0, 0, 0, 0, 897749.506753749, 7272.99092335283, 
-772.886617083522), EK = c(0, 8727.76940892211, 0, 0, 968654.154020939, 
15784.2477645437, 5852.58295283679, 10126.9467047725, 30178.7537734498, 
6452.5979909353, 19917.497780561, 10505.985181701, 20386.342324393, 
157344.902967186, 0, 18979.9946111749, 0, 45191.3139241601, 5152.46448254231, 
14606.1474532383, 17485.4003681122, 956.14494674998, 0, 0, 76242.2008458597, 
0, 0, 3271.78810230747, 43153.4577000728, 46305.3394606706, 18865.559964053, 
1676.38074270217, 73792.5743431249, 816694.588902057, 19259.8905579868, 
17273.9513316854, 5558.58747405537, 14081.8692372851, 0, 0, 53417.9083227921, 
2766.49603915519, 0, 0, 0, 0, 0, 925462.446710217, 7411.27949339667, 
-2269.10945132687), EH = c(0, -263.94275840295, 0, 0, 16158.6179936329, 
-3.16477443318076, -29.216059415372, 106.792738898008, 499.492912575829, 
-171.310453842869, -39.1092693880457, -289.10463150853, 275.420923167619, 
3079.15882862376, 0, 957.821280729258, 0, -1738.77348349646, 
120.536516032746, -160.84689984772, 57.13041788553, -183.814100250285, 
0, 0, 1761.64693613498, 0, 0, 146.38919870183, 2017.65574682313, 
849.438319209584, 796.740483339144, 102.181323504405, 2097.29234911977, 
-23924.9331332788, 122.177038961415, -188.283126902762, -443.139245528132, 
25.5256856422447, 0, 0, 2383.79745463335, 86.2851030781408, 0, 
0, 0, 0, 0, 27712.9399564679, 138.288570043838, -1496.22283424335
), pBu = c(0.1, 0.05, 0.1, 0.1, 0.05, 0.1, 0.05, 0.15, 0.05, 
0.1, 0.05, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 
0.1, 0.1, 0.1, 0.1, 0.1, 0.15, 0.1, 0.1, 0.1, 0.1, 0.1, 0.05, 
0.1, 0.1, 0.2, 0.1, 0.1, 0.2, 0.1, 0.05, 0.1, 0.1, 0.1, 0.05, 
0.05, 0.1, 0.1, 0.1, 0.1)), class = c("data.table", "data.frame"
), row.names = c(NA, -50L))

现在我针对不同的公差带pBu 为列EV 构建了一个ggplot 直方图:

EV <- (dt.test %>% dplyr::mutate(pBu = as.factor(pBu)) %>%
         ggplot(aes(x = EV)) +
         geom_histogram(aes(color = pBu, binwidth = 0.3), fill = "transparent") + 
         ggtitle("Histogram EV 2020") +
         ylab("Counts") +
         theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),
               plot.title = element_text(lineheight = .8, hjust = 0.5, face = "bold"),
               legend.title = element_blank(), 
               legend.background = element_rect(fill = "white", size = 0.5, linetype = "solid", colour ="black"),
               legend.box.background = element_rect(colour = "black"), legend.box.margin = margin(t = 1, l = 1))
       
) %>%
  ggplotly()

这个情节是这样的:

现在我想知道为什么pBu = 0.05 的条比绿色的条高?因为当我使用plotly 的交互工具时,我可以看到,红色的(pBu=0.05)计数为 4,绿色的(pBu=0.1)计数为 25。

如何正确订购?看情节时,每个人都会说EV=00.05的计数比0.1的计数多,这是不正确的。

【问题讨论】:

您的意思是要更改堆叠直方图中pBu 的顺序吗?或者,如果您的意思是 pBu 的高度很奇怪,在这种情况下这很正常,因为它是堆叠的。 是的,高度很奇怪。我不知道为什么? pBu=0.05 的 4 个计数未正确绘制 因为它是堆叠的。 pBu = 0.05 的高度是 1+ 2+ 25 + 4 【参考方案1】:

希望这些版本对您有所帮助。

非堆叠直方图

(dt.test %>% dplyr::mutate(pBu = as.factor(pBu)) %>%
  ggplot(aes(x = EV)) +
  geom_histogram(aes(color = factor(pBu), binwidth = 0.3), fill = "transparent", alpha = 0.7, position = "dodge") + 
  ggtitle("Histogram EV 2020") +
  ylab("Counts") +
  theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),
        plot.title = element_text(lineheight = .8, hjust = 0.5, face = "bold"),
        legend.title = element_blank(), 
        legend.background = element_rect(fill = "white", size = 0.5, linetype = "solid", colour ="black"),
        legend.box.background = element_rect(colour = "black"), legend.box.margin = margin(t = 1, l = 1)) 
) %>%
  ggplotly()

使用facet_wrap

(dt.test %>% dplyr::mutate(pBu = as.factor(pBu)) %>%
  ggplot(aes(x = EV)) +
  geom_histogram(aes(color = factor(pBu), binwidth = 0.3), fill = "transparent", alpha = 0.7, position = "dodge") + 
  ggtitle("Histogram EV 2020") +
  ylab("Counts") +
  theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),
        plot.title = element_text(lineheight = .8, hjust = 0.5, face = "bold"),
        legend.title = element_blank(), 
        legend.background = element_rect(fill = "white", size = 0.5, linetype = "solid", colour ="black"),
        legend.box.background = element_rect(colour = "black"), legend.box.margin = margin(t = 1, l = 1)) +
    facet_wrap(.~)
) %>%
  ggplotly()

【讨论】:

以上是关于在ggplot直方图中排序计数的主要内容,如果未能解决你的问题,请参考以下文章

ggplot中分组/计数/数据的直方图

使用 ggplot 划分每组的直方图计数

R语言ggplot2可视化:可视化堆叠的直方图添加每个分组的每个bin的计数标签在堆叠直方图的bin中的每个分组部分添加数值标签

如何在ggplot的直方图条上打印频率

用 ggplot 填充直方图 - 改变颜色

使用自定义渐变填充直方图箱