将列表列表转换为数据框

Posted

技术标签:

【中文标题】将列表列表转换为数据框【英文标题】:Convert list of lists to dataframe 【发布时间】:2017-06-07 20:34:24 【问题描述】:

我有一个嵌套列表,名为 mylist,长度为 4。

此列表的每个元素都是一个实验:exp1.1exp1.2exp2.1exp2.2

每个实验都包含对四个植物生长阶段长度(以天为单位)的观察:EM-V6V6-R0R0-R4R4-R9

每个成长阶段都被组织成一个带有yearmean的数据框。

这是完整的数据:

mylist=structure(list(exp1.1 = structure(list(`EM-V6` = structure(list(
    year = 2011:2100, mean = c(34, 34, 32, 28, 25, 32, 32, 28, 
    27, 30, 32, 31, 33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 
    30, 29, 31, 34, 30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 
    32, 31, 25, 28, 28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 
    32, 27, 28, 28, 30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 
    28, 31, 30, 27, 26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 
    26, 24, 26, 28, 25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100, 
    mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30, 
    32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33, 
    31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31, 
    30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29, 
    31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29, 
    30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30, 
    29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100, 
    mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33, 
    32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34, 
    33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 
    31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29, 
    32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31, 
    30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30, 
    31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100, 
    mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28, 
    29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30, 
    28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25, 
    26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25, 
    28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25, 
    25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26, 
    26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4", 
"R4-R9")), exp1.2 = structure(list(`EM-V6` = structure(list(year = 2011:2100, 
    mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31, 
    33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34, 
    30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28, 
    28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28, 
    30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27, 
    26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28, 
    25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100, 
    mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30, 
    32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33, 
    31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31, 
    30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29, 
    31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29, 
    30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30, 
    29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100, 
    mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33, 
    32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34, 
    33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 
    31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29, 
    32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31, 
    30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30, 
    31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100, 
    mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28, 
    29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30, 
    28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25, 
    26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25, 
    28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25, 
    25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26, 
    26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4", 
"R4-R9")), exp2.1 = structure(list(`EM-V6` = structure(list(year = 2011:2100, 
    mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31, 
    33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34, 
    30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28, 
    28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28, 
    30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27, 
    26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28, 
    25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100, 
    mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30, 
    32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33, 
    31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31, 
    30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29, 
    31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29, 
    30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30, 
    29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100, 
    mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33, 
    32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34, 
    33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 
    31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29, 
    32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31, 
    30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30, 
    31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100, 
    mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28, 
    29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30, 
    28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25, 
    26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25, 
    28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25, 
    25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26, 
    26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4", 
"R4-R9")), exp2.2 = structure(list(`EM-V6` = structure(list(year = 2011:2100, 
    mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31, 
    33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34, 
    30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28, 
    28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28, 
    30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27, 
    26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28, 
    25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100, 
    mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30, 
    32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33, 
    31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31, 
    30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29, 
    31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29, 
    30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30, 
    29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100, 
    mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33, 
    32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34, 
    33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 
    31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29, 
    32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31, 
    30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30, 
    31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100, 
    mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28, 
    29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30, 
    28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25, 
    26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25, 
    28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25, 
    25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26, 
    26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA, 
-90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4", 
"R4-R9"))), .Names = c("exp1.1", "exp1.2", "exp2.1", "exp2.2"
))

我需要做的是将此嵌套列表“取消列出”到如下所示的数据框中:

YEAR   EXP   EM-V6   V6-R0   R0-R4   R4-R9
2011  exp1.1  34      30      31      27
2011  exp1.2  34      30      31      27
2011  exp2.1  34      30      31      27
2011  exp1.1  34      30      31      27

这意味着:

 - first year, first experiment, and growth stages.
 - first year, second experiment and growth stages.
 - first year, third experiment and growth stages
 - first year, fourth experiment and growth stages
 - second year, first experiment and growth stages

等等。

如何执行该数据转换?

【问题讨论】:

@akrun。你确定这个问题是骗人的吗?另一个 Q How to reshape data from long to wide format? 不处理嵌套列表。 我同意,其他问题解决了完全不同的请求。请考虑将此问题取消标记为重复的@akrun。 @UweBlock 它已经被删除了。 【参考方案1】:

我们可以使用tidyverse,代码更简洁易读

library(dplyr)
library(tidyr)
library(purrr)
res1 <- mylist %>%
            #bind the inner datasets and create an id column
            map(bind_rows, .id = "id") %>%
            #bind the outer datasets and create an EXP column
            bind_rows(.id = "EXP") %>% 
            #reshape to wide format
            spread(id, mean) 

head(res1, 4)
#     EXP year EM-V6 R0-R4 R4-R9 V6-R0
#1 exp1.1 2011    34    31    27    30
#2 exp1.1 2012    34    32    29    33
#3 exp1.1 2013    32    32    28    33
#4 exp1.1 2014    28    33    28    32

或者我们可以通过使用lapply 循环遍历mylist 来解决此问题,然后通过cbind 使用内部names 元素的names 创建一个新列'name'使用Map,然后rbind list 元素和do.call(rbind,现在再做一个Map 以基于'mylist' 的namesrbindlist 元素和reshape 创建一个新列base R 将其转换为“宽”

res <- do.call(rbind, Map(cbind, lapply(mylist, function(x) 
    do.call(rbind, Map(cbind, x, name = names(x)))), EXP= names(mylist)))
res2 <- reshape(res, idvar = c("year", "EXP"), 
              timevar = "name", direction = "wide")
row.names(res2) <- NULL
head(res2, 4)
#   year    EXP mean.EM-V6 mean.V6-R0 mean.R0-R4 mean.R4-R9
#1 2011 exp1.1         34         30         31         27
#2 2012 exp1.1         34         33         32         29
#3 2013 exp1.1         32         33         32         28
#4 2014 exp1.1         28         32         33         28

注意:未使用外部包 (100% base R)

或使用 reshape2 中的 dcast 转换为“宽”格式

library(reshape2)
res2 <- dcast(res, year + EXP~name, value.var = "mean") 
head(res2, 4)
#   year    EXP EM-V6 V6-R0 R0-R4 R4-R9
#1 2011 exp1.1    34    30    31    27
#2 2011 exp1.2    34    30    31    27
#3 2011 exp2.1    34    30    31    27
#4 2011 exp2.2    34    30    31    27

【讨论】:

谢谢,它有效!我很高兴我问了这个问题,因为答案远远超出了我的 R 技能。 @thiagoveloso 我还更新了一个不使用外部包的解决方案。【参考方案2】:

使用data.table-package 中的rbindlist 两次的替代方法:

library(data.table)
# bind the dataframes in the 'listed lists' together and include the year with the 'id'-parameter
# the resulting 'data.table's are returned as a list
step1 <- lapply(mylist, rbindlist, id = 'stages')
# bind the resulting list together and include the experiment id
step2 <- rbindlist(step1, id = 'experiment')
# reshape to wide format
dcast(step2, year + experiment ~ stages, value.var = 'mean')

或者一口气:

dcast(rbindlist(lapply(mylist, rbindlist, id = 'stages'), id = 'experiment'),
      year + experiment ~ stages, value.var = 'mean')

给出:

     year experiment EM-V6 R0-R4 R4-R9 V6-R0
  1: 2011     exp1.1    34    31    27    30
  2: 2011     exp1.2    34    31    27    30
  3: 2011     exp2.1    34    31    27    30
  4: 2011     exp2.2    34    31    27    30
  5: 2012     exp1.1    34    32    29    33
 ---                                        
356: 2099     exp2.2    30    30    25    29
357: 2100     exp1.1    26    30    24    29
358: 2100     exp1.2    26    30    24    29
359: 2100     exp2.1    26    30    24    29
360: 2100     exp2.2    26    30    24    29

【讨论】:

它也可以,非常感谢。我会接受你的回答,因为它使用了我已经在我的代码中加载的包(无需加载额外的包)。【参考方案3】:

备用tidyverse:

library(tidyverse)

map_df(mylist, ~bind_rows(., .id="id"), .id="EXP") %>% 
  spread(id, mean)

【讨论】:

它还显示了不同且更简洁的 purrr 语法 一个上升有点意味着至少有一个人认为它已经足够不同了。

以上是关于将列表列表转换为数据框的主要内容,如果未能解决你的问题,请参考以下文章

将数据框转换为列表时出错

将列表列表转换为数据框

将列表转换为 pyspark 数据框

将数据框转换为列表的树结构列表

将嵌套列表中的元素转换为数据框

为啥 list 应该先转换为 RDD 再转换为 Dataframe?有啥方法可以将列表转换为数据框?