如何绘制混合箱线图:另一半有抖动点的半箱线图?
Posted
技术标签:
【中文标题】如何绘制混合箱线图:另一半有抖动点的半箱线图?【英文标题】:How to plot a hybrid boxplot: half boxplot with jitter points on the other half? 【发布时间】:2018-08-06 19:35:09 【问题描述】:我试图在今年的article published on Nature 中制作与图 2d-f 类似的情节。它基本上是一个半箱线图,另一半有点。
谁能给我一些提示?非常感谢!
这些是我的数据和代码,它们生成了带有点的完整盒子
require(magrittr)
require(tidyverse)
dat <- structure(list(p1 = c(0.0854261831077604, 0.408418657218253,
0.577793646477315, 0.578028229977424, 0.48933166218204, 0.53117814324334,
0.526653494462464, 0.00687616283435221, 0.444300425796509, 0.00287319455358522,
0.949821402532831, 0.96832469523368, 0.953281969982759, 0.360125244759434,
0.407921095422844, 0.885776732104954, 0.159882184516691, 0.911094990767761,
0.0444367172734037, 0.144888951725151, 0.508858686640707, 0.694913731085945,
0.117270366119258, 0.78227546070467, 0.980457304886186, 0.711464034564424,
0.753944466390685, 0.0474210438747038, 0.00344183466223558, 0.0290017465534545,
0.75092385236303, 0.868873921257987, 0.744396990487425, 0.0140007244233847,
0.0332266395043963, 0.482897084793009, 0.0535516646483004, 0.452926358923891,
0.0144057727301603, 0.171918034525543), p2 = c(0.101262675229211,
0.196913109208586, 0.37814311161382, 0.0677625689405156, 0.12517090579686,
0.409083554335168, 0.158886941347288, 0.847394861862651, 0.180560031076741,
0.967122694294885, 0.000901627067665116, 0.00039495110143705,
9.70707318411806e-05, 0.546200038486894, 0.435475454787648, 5.95555269800323e-06,
0.0178837768834925, 8.42690065415846e-06, 0.00777059697751842,
0.0020397073541544, 0.486699073016371, 0.283679673247571, 0.857183359146641,
0.200712003853458, 0.0164911141652784, 0.0542250670734297, 0.232340206984506,
0.948523714169708, 0.169881661474024, 0.968983592882272, 0.00250367590158291,
0.000792323746977033, 0.000185068166140097, 0.0193600071757997,
0.114775271592724, 4.65931778380389e-05, 0.000754760900847164,
2.07521623816406e-05, 0.00782764273312856, 0.00276993826117348
), p3 = c(0.0118642223785376, 0.0267362912322735, 6.60753171741111e-08,
0.053576051466652, 0.00375873110094442, 9.85095078844696e-08,
0.0525436528683484, 0.0193735809639814, 8.44717454802822e-07,
0.00608007737576027, 0.0205563904131287, 0.0104638062130591,
0.0249997053664864, 0.0587924727726031, 0.0443600964770995, 0.067125687916273,
0.758612877724648, 0.0618158334848203, 0.0251025592849138, 0.790905778949543,
0.00126904829915329, 0.00760772364901772, 0.00119821088328392,
0.0115117347754715, 0.000863676435448072, 0.000996891439583434,
0.0115279148630096, 0.00249122388568909, 5.21508620418823e-05,
0.00144050407848742, 0.120373444447631, 0.0534773096149069, 0.110284261289338,
0.571243879053544, 0.438152084363961, 0.364887514202121, 0.696293189762153,
0.414870716968937, 0.0557358576822093, 0.783929426716999), p4 = c(0.000107231042599948,
0.000379648762557529, 8.25102162601208e-06, 0.000343829024899591,
0.000140680688077216, 1.90076798696051e-06, 0.000214507212681323,
1.38587688080716e-05, 3.48104084092359e-06, 6.50782599216903e-07,
0.0114584884733498, 0.00652170746426181, 0.0143309604192116,
0.0275718029789144, 0.0352327288308957, 0.022950800779703, 0.0569939247302654,
0.0190248244391564, 0.0305921420687752, 0.00589871320676732,
0.000805515847378872, 1.97674357551495e-05, 8.30853708305541e-06,
1.32462751169762e-06, 4.8731965929686e-05, 0.0057411315642433,
4.82406700397824e-05, 0.000204633566379066, 0.0552263911781015,
0.000181994007177494, 0.0585729576787707, 0.0273685460128338,
0.0568746134466117, 0.299309335625926, 0.278980446497419, 0.105600715225359,
0.176549247514501, 0.101420411455169, 0.01003894550707, 0.0010803018725911
), p5 = c(0.786823338804824, 0.151956168584644, 0.0433468890359269,
0.19556481029922, 0.380808150243027, 0.0389798680141623, 0.260481184897901,
0.101147673996922, 0.0184624278061585, 0.0222416874775066, 0.000113517761014704,
0.00329593083795693, 0.000476682365422989, 0.00571997662739322,
0.0697473913851358, 0.0216803412883361, 0.00631472476841249,
0.00628215584877364, 0.540944692186543, 0.0135127011440213, 0.00235752761214414,
3.10282042735927e-06, 0.0239147204208516, 4.97334784773176e-05,
0.00213837866453402, 0.000212207014031345, 0.00180443364400107,
8.15954685083038e-05, 0.00445169398173509, 0.000391265642772285,
0.0676128522356959, 0.0494864355994384, 0.0882575475549674, 0.0960799089263987,
0.134853114895623, 0.0465661014986807, 0.0728456746626632, 0.0307607877988244,
0.476388236185883, 0.00831263646470973), p6 = c(0.0145163494370677,
0.215596124993685, 0.00070803577599434, 0.104724510291289, 0.000789869989050939,
0.0207564351298348, 0.00122021921131791, 0.0251938615732845,
0.356672789562296, 0.00168169551566413, 0.0171485737520108, 0.0109989091496048,
0.00681361113427885, 0.00159046437476052, 0.00726323309637717,
0.00246048235803604, 0.000312511376490686, 0.00177376855883463,
0.351153292208846, 0.0427541476203625, 1.01485842454486e-05,
0.0137760017612841, 0.000425034892882118, 0.0054497425604112,
7.93882623673471e-07, 0.227360668344289, 0.000334737447758259,
0.0012777890350116, 0.766946267841861, 8.96835836820999e-07,
1.32173732897771e-05, 1.46376785664669e-06, 1.51905551715105e-06,
6.14479494697213e-06, 1.24431458762028e-05, 1.99110299298599e-06,
5.46251153509928e-06, 9.72690797485877e-07, 0.435603545161549,
0.0319896621589845), type = c("small", "small", "small", "small",
"small", "small", "small", "small", "small", "small", "small",
"small", "small", "small", "small", "small", "small", "small",
"small", "small", "big", "big", "big", "big", "big", "big", "big",
"big", "big", "big", "big", "big", "big", "big", "big", "big",
"big", "big", "big", "big"), loc = c("abro", "abro", "abro",
"abro", "abro", "abro", "abro", "abro", "abro", "abro", "dome",
"dome", "dome", "dome", "dome", "dome", "dome", "dome", "dome",
"dome", "abro", "abro", "abro", "abro", "abro", "abro", "abro",
"abro", "abro", "abro", "dome", "dome", "dome", "dome", "dome",
"dome", "dome", "dome", "dome", "dome")), .Names = c("p1", "p2",
"p3", "p4", "p5", "p6", "type", "loc"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -40L))
glimpse(dat)
#> Observations: 40
#> Variables: 8
#> $ p1 <dbl> 0.085426183, 0.408418657, 0.577793646, 0.578028230, 0.489...
#> $ p2 <dbl> 1.012627e-01, 1.969131e-01, 3.781431e-01, 6.776257e-02, 1...
#> $ p3 <dbl> 1.186422e-02, 2.673629e-02, 6.607532e-08, 5.357605e-02, 3...
#> $ p4 <dbl> 1.072310e-04, 3.796488e-04, 8.251022e-06, 3.438290e-04, 1...
#> $ p5 <dbl> 7.868233e-01, 1.519562e-01, 4.334689e-02, 1.955648e-01, 3...
#> $ p6 <dbl> 1.451635e-02, 2.155961e-01, 7.080358e-04, 1.047245e-01, 7...
#> $ type <chr> "small", "small", "small", "small", "small", "small", "sm...
#> $ loc <chr> "abro", "abro", "abro", "abro", "abro", "abro", "abro", "...
将数据转换为长格式
dat_long <- dat %>%
gather(key, value, 1:6) %>%
mutate(loc = factor(loc, levels = c("abro", "dome")),
type = factor(type),
key = factor(key))
用点绘制箱线图
ggplot(dat_long, aes(x = type, y = value, color = key)) +
facet_grid(loc ~ key) +
geom_point(position = position_jitter(width = 0.3), alpha = 0.3, size = 2) +
geom_boxplot(outlier.color = NA) +
theme_light() +
theme(legend.position = "bottom") +
guides(col = guide_legend(nrow = 1))
【问题讨论】:
只是出于好奇,您是否也创建了this reddit post?那里的最佳答案相当接近,但我认为这不是您发布的情节的创建方式。 这里有一个提示可能会有所帮助:您可能需要考虑查看 position_nudge(而不是 position_jitter),您也可以在 geom_boxplot 中使用 width 参数 @LAP:不是我。谢谢你的链接 @DS_UNI:谢谢你的提示! 【参考方案1】:添加另一个选项:@erocoar 开发的gghalves
包
library(tidyverse)
library(ggbeeswarm)
# if (!require(devtools))
# install.packages('devtools')
#
# devtools::install_github('erocoar/gghalves')
library(gghalves)
# default
ggplot(dat_long, aes(x = type, y = value, color = type)) +
facet_grid(loc ~ key, scales = 'free_y') +
geom_half_boxplot(nudge = 0.05, outlier.color = NA) +
geom_half_point() +
theme_light() +
theme(legend.position = "bottom") +
guides(color = guide_legend(nrow = 1))
# plot half violin
ggplot(dat_long, aes(x = type, y = value)) +
facet_grid(loc ~ key, scales = 'free_y') +
geom_half_boxplot(nudge = 0.05) +
geom_half_violin(aes(fill = type),
side = "r", nudge = 0.01) +
theme_light() +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 1))
# using ggbeeswarm for plotting points
ggplot(dat_long, aes(x = key, y = value, color = type)) +
facet_grid(loc ~ ., scales = 'free_y') +
geom_half_boxplot(position = position_dodge(width = 0.9),
nudge = 0.05, outlier.color = NA) +
geom_half_point(transformation = position_quasirandom(width = .9, groupOnX = TRUE)) +
theme_light() +
theme(legend.position = "bottom") +
guides(color = guide_legend(nrow = 1))
由reprex package (v0.3.0) 于 2020 年 4 月 30 日创建
【讨论】:
【参考方案2】:一个非常快速的解决方案是使用position_nudge
添加一些微调。
dat_long %>%
ggplot(aes(x = type, y = value, fill=key)) +
geom_boxplot(outlier.color = NA) +
geom_point(position = position_nudge(x=0.5), shape = 21, size = 2) +
facet_grid(loc ~ key)
或将x轴因子转换为数值并添加一些值
dat_long %>%
ggplot(aes(x = type, y = value, fill=key)) +
geom_boxplot(outlier.color = NA) +
geom_point(aes(as.numeric(type) + 0.5), shape = 21, size = 2) +
facet_grid(loc ~ key)
以下是关于 x 轴位置的更通用的方法。简而言之,这个想法是添加相同框的第二个数据层。第二个框使用合适的线型和 alpha 隐藏(请参阅scale_
),但很容易被点覆盖。
dat_long <- dat %>%
gather(key, value, 1:6) %>%
mutate(loc = factor(loc, levels = c("abro", "dome")),
type = factor(type),
key = factor(key)) %>%
mutate(gr=1) # adding factor level for first layer
dat_long %>%
mutate(gr=2) %>% # adding factor level for second invisible layer
bind_rows(dat_long) %>% # add the same data
ggplot(aes(x = type, y = value, fill=key, alpha=factor(gr), linetype = factor(gr))) +
geom_boxplot(outlier.color = NA) +
facet_grid(loc ~ key) +
geom_point(data=. %>% filter(gr==1),position = position_nudge(y=0,x=0.2), shape = 21, size = 2)+
scale_alpha_discrete(range = c(1, 0)) +
scale_linetype_manual(values = c("solid","blank")) +
guides(alpha ="none", linetype="none")
使用 zankuralt 发布的代码 below 并针对分面进行优化,您可以尝试:
dat %>%
gather(key, value, 1:6) %>%
mutate(loc = factor(loc, levels = c("abro", "dome")),
type = factor(type),
key = factor(key)) %>%
mutate(type2=as.numeric(type)) %>%
group_by(type, loc, key) %>%
mutate(d_ymin = min(value),
d_ymax = max(value),
d_lower = quantile(value, 0.25),
d_middle = median(value),
d_upper = quantile(value, 0.75)) %>%
ggplot() +
geom_boxplot(aes(x = type2 - 0.2,
ymin = d_lower,
ymax = d_upper,
lower = d_lower,
middle = d_middle,
upper = d_upper,
width = 2 * 0.2,
fill = key),
stat = "identity") +
geom_jitter(aes(x = type2 + 0.2,
y = value,
color = key),
width = 0.2 - 0.25 * 0.2,
height = 0)+
# vertical segment
geom_segment(aes(x = type2,
y = d_ymin,
xend = type2,
yend = d_ymax)) +
# top horizontal segment
geom_segment(aes(x = type2 - 0.1,
y = d_ymax,
xend = type2,
yend = d_ymax)) +
# top vertical segment
geom_segment(aes(x = type2 - 0.1,
y = d_ymin,
xend = type2,
yend = d_ymin)) +
# have to manually add in the x scale because we made everything numeric
# to do the shifting
scale_x_continuous(breaks = c(1,2),
labels = c("big","small"))+
facet_grid(loc ~ key)
【讨论】:
@IloveCatRPython 我正在使用ggplot2_2.2.1
我从 CRAN 安装了最新的 tidyverse
包。 install.packages("tidyverse")
我重装ggplot2 (2016-12-30)
后和你的情节一样。再次感谢您!
我看到你已经更新了你的答案 - 我意识到 position_nudge
并没有真正给出解决方案,这是我在这里发现它后首先声明的。它将 x 固定到一个位置并且不允许分散。此外,对于您现在的第二个解决方案 - 值得指出的是 x 变量必须是一个因素!如果保存为字符,则必须使用as.numeric(as.factor(x))
或as.numeric(factor(x))
【参考方案3】:
我发现这个混合箱线图非常非常可爱,所以我也想重新创建它。
我写了一个geom_boxjitter
,它继承自geom_boxplot
,并且只添加了一些小的改动:
geom_rect
。
它以默认的右半边宽度、默认高度 0.4* 分辨率来抖动点,也可以采用种子参数。
它添加了额外的晶须(
水平的)如果errorbar.draw
设置为TRUE
。它们的长度
也可以调整。
You can check the code here. 我认为改变现有的geom
s 是多么容易,只需稍作改动。使用您的部分数据:
library(tidyverse)
library(cowplot)
library(ggparl)
P <- ggplot(
dat_long %>% filter(key %in% c("p1", "p2")),
aes(x = type, y = value, fill = key)) +
geom_boxjitter(outlier.color = NA, jitter.shape = 21, jitter.color = NA,
jitter.height = 0.05, jitter.width = 0.075, errorbar.draw = TRUE) +
theme(legend.position = "none") +
ylim(c(-0.05, 1.05)) +
scale_fill_manual(values = c("#ecb21e", "#812e91"))
P
【讨论】:
尝试您的代码后出现此错误:Error: Found object is not a position
哦...哎呀。我修复了错误。来自ggplot
s 开发者版本的东西滑入 - 必须用dodge
替换dodge2
:)
在为R 3.4.4
重新安装ggplot2
后,积分又出现了。谢谢!
只是想提一下,我已经改进了 ggpol 库中的混合箱线图!
@MatiasAndina 延迟响应,但这对于 gghalves 库是可行的,使用参数 boxplot.center = TRUE
:)以上是关于如何绘制混合箱线图:另一半有抖动点的半箱线图?的主要内容,如果未能解决你的问题,请参考以下文章