dplyr
Posted 不落的月亮
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了dplyr相关的知识,希望对你有一定的参考价值。
arrange(.data, ..., .by_group = FALSE)
msleep%>%group_by(vore)%>%arrange(sleep_total)%>%write_csv("C:/Users/panda/Desktop/R-result/arrange0.csv")
msleep%>%group_by(vore)%>%arrange(sleep_total,.by_group=TRUE)%>%write_csv("C:/Users/panda/Desktop/R-result/arrange1.csv")
第一次执行组存在但表中看不出
count(x, ..., wt = NULL, sort = FALSE, name = NULL)
tally(x, wt = NULL, sort = FALSE, name = NULL)
add_count(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated())
add_tally(x, wt = NULL, sort = FALSE, name = NULL)
msleep%>%count(vore,sort=TRUE,name=\'n\')%>%write_csv("C:/Users/panda/Desktop/R-result/count0.csv")
msleep%>%count(vore,wt=sleep_total,sort=TRUE,name=\'n\')%>%write_csv("C:/Users/panda/Desktop/R-result/count1.csv")
msleep%>%group_by(vore)%>%tally(sort=TRUE,name=\'n\')%>%write_csv("C:/Users/panda/Desktop/R-result/tally.csv")
msleep%>%add_count(vore,sort=TRUE,name=\'n\')%>%write_csv("C:/Users/panda/Desktop/R-result/add_count.csv")
msleep%>%add_tally(wt=sleep_total,sort=TRUE,name=\'n\')%>%write_csv("C:/Users/panda/Desktop/R-result/add_tally.csv")
<font color=red> .drop = deprecated()</font>
count(列名)==group_by(列名)%>%tally()
add_保留原数据并添加新列(新列重复项肯定多)
wt如果是数值型数据新列是sum的结果
distinct(.data, ..., .keep_all = FALSE)
msleep%>%distinct(vore,.keep_all=TRUE)%>%write_csv("C:/Users/panda/Desktop/R-result/distinct0.csv")
msleep%>%distinct(vore)%>%write_csv("C:/Users/panda/Desktop/R-result/distinct1.csv")
.keep_all = TRUE保留所有列并重复时选择第一行
filter(.data, ..., .preserve = FALSE)
msleep%>%group_by(vore)%>%filter(sleep_total>10)%>%write_csv("C:/Users/panda/Desktop/R-result/filter0.csv")
.preserve = FALSE组的数量可能会减少,当在某组没有满足条件的行
mutate(
.data,
...,
.keep = c("all", "used", "unused", "none"),
.before = NULL,
.after = NULL
)
transmute(.data, ...)
msleep%>%mutate(test=sleep_total*2,.keep=\'none\')%>%write_csv("C:/Users/panda/Desktop/R-result/mutate.csv")
msleep%>%transmute(test=sleep_total*2)%>%write_csv("C:/Users/panda/Desktop/R-result/transmute.csv")
mutate(.data,.keep = "none")==transmute(.data)
all保留所有列,used保留使用到的列,unused保留没使用的列,none只保留新列
pull(.data, var = -1, name = NULL, ...)
msleep%>%pull(name =\'vore\')
pull()只有数据没列名,此段代码包含两列数据
正整数计数从左边,负整数右侧计数,默认值返回最后一个列
relocate(.data, ..., .before = NULL, .after = NULL)
msleep%>%relocate(vore,.before=\'name\')%>%write_csv("C:/Users/panda/Desktop/R-result/relocate.csv")
rename(.data, ...)
rename_with(.data, .fn, .cols = everything(), ...)
msleep%>%rename(test=vore)%>%write_csv("C:/Users/panda/Desktop/R-result/rename.csv")
msleep%>%rename_with(toupper)%>%write_csv("C:/Users/panda/Desktop/R-result/rename_with.csv")
<font color=\'red\'>.cols = everything()</font>
select(.data, ...)
msleep%>%select(vore,sleep_total)%>%write_csv("C:/Users/panda/Desktop/R-result/select.csv")
summarise(.data, ..., .groups = NULL)
summarize(.data, ..., .groups = NULL)
msleep%>%group_by(vore)%>%summarize(mean(sleep_total))%>%write_csv("C:/Users/panda/Desktop/R-result/summarise.csv")
.groups参数控制结果的分组结构,<font color=\'red\'>"drop_last"去掉最后一个分组</font>,"drop"时去掉所有分组水平,此时没有组属性;"keep"时保持与.data相同的分组结构; "rowwise"每一行自成一组
slice(.data, ..., .preserve = FALSE)
slice_head(.data, ..., n, prop)
slice_tail(.data, ..., n, prop)
slice_min(.data, order_by, ..., n, prop, with_ties = TRUE)
slice_max(.data, order_by, ..., n, prop, with_ties = TRUE)
slice_sample(.data, ..., n, prop, weight_by = NULL, replace = FALSE)
msleep%>%group_by(vore)%>%slice(1:5)%>%write_csv("C:/Users/panda/Desktop/R-result/slice.csv")
msleep%>%slice_head(prop=0.2)%>%write_csv("C:/Users/panda/Desktop/R-result/slice_head.csv")
msleep%>%slice_head(n=10)%>%write_csv("C:/Users/panda/Desktop/R-result/slice_tail.csv")
msleep%>%group_by(vore)%>%slice_min(sleep_total,n=5,with_ties = TRUE)%>%write_csv("C:/Users/panda/Desktop/R-result/slice_min.csv")
msleep%>%group_by(vore)%>%slice_max(sleep_total,n=5,with_ties = FALSE)%>%write_csv("C:/Users/panda/Desktop/R-result/slice_max.csv")
msleep%>%slice_sample(n=30,replace=TRUE)%>%write_csv("C:/Users/panda/Desktop/R-result/slice_sample.csv")
.preserve = TRUE与上文作用相同
n, prop提供行数,或选择行的比例
可能会返回比您请求的更多的行,with_ties=FALSE返回第一行
weight_by增加此列值大的抽中概率
bind_rows(..., .id = NULL)
bind_cols(
...,
.name_repair = c("unique", "universal", "check_unique", "minimal")
)
one<-msleep%>%slice_head(n=10)
two<-msleep%>%slice_tail(n=10)
bind_rows(one,two,.id=\'id\')%>%write_csv("C:/Users/panda/Desktop/R-result/bind_rows.csv")
bind_cols(one,two)%>%write_csv("C:/Users/panda/Desktop/R-result/bind_cols.csv")
.id将增加一列,用来区分此行数据来自哪个表
相当于pandas的concat(),横向纵向拼接
<font color=\'red\'>.name_repair =c("unique", "universal", "check_unique", "minimal")</font>
检索和修复名称-vec_as_names•vctrs (r-lib.org)
union_all(x, y, ...)
one<-msleep%>%slice_head(n=10)
two<-msleep%>%slice_head(n=10)
union_all(one,two)%>%write_csv("C:/Users/panda/Desktop/R-result/union_all.csv")
横向拼接,不删除重复值
inner_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = FALSE,
na_matches = c("na", "never")
)
left_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = FALSE,
na_matches = c("na", "never")
)
right_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = FALSE,
na_matches = c("na", "never")
)
full_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = FALSE,
na_matches = c("na", "never")
)
one<-msleep%>%slice_head(n=5)
two<-msleep%>%slice_head(n=8)
left_join(one,two,by=\'name\')%>%write_csv("C:/Users/panda/Desktop/R-result/left_join.csv")
inner_join(one,two,by=\'name\')%>%write_csv("C:/Users/panda/Desktop/R-result/inner_join.csv")
right_join(one,two,by=\'name\')%>%write_csv("C:/Users/panda/Desktop/R-result/right_join.csv")
full_join(one,two,by=\'name\')%>%write_csv("C:/Users/panda/Desktop/R-result/full_join.csv")
<font color=\'red\'>copy = FALSE,suffix = c(".x", ".y"), ..., keep = FALSE,na_matches = c("na", "never")</font>
nest_join(x, y, by = NULL, copy = FALSE, keep = FALSE, name = NULL, ...)
one<-msleep%>%slice_head(n=5)
two<-msleep%>%slice_head(n=8)
nest_join(one,two,by=\'name\')
nest_join()类似left_join(),返回的形式不一样two <list>
<font color=\'red\'> copy = FALSE, keep = FALSE, name = NULL</font>
Core Socialist Value
prosperity democracy civility harmony
freedom equality justice rule of law
patriotism dedication integrity friendship
semi_join(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never"))
anti_join(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never"))
one<-msleep%>%slice_head(n=5)
two<-msleep%>%slice_head(n=8)
semi_join(two,one,by=\'name\')%>%write_csv("C:/Users/panda/Desktop/R-result/semi_join.csv")
anti_join(two,one,by=\'name\')%>%write_csv("C:/Users/panda/Desktop/R-result/anti_join.csv")
<font color=\'red\'>copy = FALSE, ..., na_matches = c("na", "never")</font>
group_by(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data))
ungroup(x, ...)
msleep%>%group_by(vore)%>%ungroup()
<font color=\'red\'>.add = FALSE, .drop = group_by_drop_default(.data)</font>
group_cols(vars = NULL, data = NULL)
msleep%>%group_by(vore)%>%select(group_cols())%>%write_csv("C:/Users/panda/Desktop/R-result/group_cols.csv")
<font color=\'red\'>vars = NULL, data = NULL</font>
rowwise(data, ...)
msleep%>%rowwise(vore)
以上是关于dplyr的主要内容,如果未能解决你的问题,请参考以下文章