R语言实战 - 基本数据管理
Posted 你的踏板车要滑向哪里
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了R语言实战 - 基本数据管理相关的知识,希望对你有一定的参考价值。
5. 缺失值
> y <- c(1,2,3,NA) > is.na(y) [1] FALSE FALSE FALSE TRUE > leadership manager testDate country gender age item1 item2 item3 item4 item5 1 1 10/24/08 US M 32 5 4 5 5 5 2 2 10/28/08 US F 45 3 5 2 5 5 3 3 10/1/08 UK F 25 3 5 5 5 2 4 4 10/12/08 UK M 39 3 3 4 NA NA 5 5 5/1/09 UK F NA 2 2 1 2 1 stringAsFactors agecat 1 FALSE Young 2 FALSE Young 3 FALSE Young 4 FALSE Young 5 FALSE <NA> > leadership[,6:10] item1 item2 item3 item4 item5 1 5 4 5 5 5 2 3 5 2 5 5 3 3 5 5 5 2 4 3 3 4 NA NA 5 2 2 1 2 1 > is.na(leadership[,6:10]) item1 item2 item3 item4 item5 1 FALSE FALSE FALSE FALSE FALSE 2 FALSE FALSE FALSE FALSE FALSE 3 FALSE FALSE FALSE FALSE FALSE 4 FALSE FALSE FALSE TRUE TRUE 5 FALSE FALSE FALSE FALSE FALSE >
5.1 重编码某些值为缺失值
> leadership$age[leadership$age == 99] <- NA > leadership$age [1] 32 45 25 39 NA >
5.2 在分析中排除缺失值
> x <- c(1,2,NA,3) > y <- x[1] + x[2] + x[3] + x[4] > z <- sum(x) > x [1] 1 2 NA 3 > y [1] NA > z [1] NA > > > x <- c(1,2,NA,3) > y <- sum(x, na.rm=TRUE) > > X Error: object \'X\' not found > x [1] 1 2 NA 3 > y [1] 6 > leadership manager testDate country gender age item1 item2 item3 item4 item5 1 1 10/24/08 US M 32 5 4 5 5 5 2 2 10/28/08 US F 45 3 5 2 5 5 3 3 10/1/08 UK F 25 3 5 5 5 2 4 4 10/12/08 UK M 39 3 3 4 NA NA 5 5 5/1/09 UK F NA 2 2 1 2 1 stringAsFactors agecat 1 FALSE Young 2 FALSE Young 3 FALSE Young 4 FALSE Young 5 FALSE <NA> > newdata <- na.omit(leadership) > newdata manager testDate country gender age item1 item2 item3 item4 item5 1 1 10/24/08 US M 32 5 4 5 5 5 2 2 10/28/08 US F 45 3 5 2 5 5 3 3 10/1/08 UK F 25 3 5 5 5 2 stringAsFactors agecat 1 FALSE Young 2 FALSE Young 3 FALSE Young >
6. 日期值
> mydates <- as.Date(c("2007-06-22", "2004-02-13")) > mydates [1] "2007-06-22" "2004-02-13" > > > strDates <- c("01/05/1965", "08/16/1975") > strDates [1] "01/05/1965" "08/16/1975" > dates <- as.Date(strDates, "%m/%d/%Y") > dates [1] "1965-01-05" "1975-08-16" > > myformat <- "%m/%d/%y" > leadership$date <- as.Date(leadership$date, myformat) Error in as.Date.default(leadership$date, myformat) : do not know how to convert \'leadership$date\' to class “Date” > leadership$testdate <- as.Date(leadership$date, myformat) Error in as.Date.default(leadership$date, myformat) : do not know how to convert \'leadership$date\' to class “Date” > leadership$testDate <- as.Date(leadership$testDate, myformat) > leadership$testDate [1] "2008-10-24" "2008-10-28" "2008-10-01" "2008-10-12" "2009-05-01" > leadership manager testDate country gender age item1 item2 item3 item4 item5 1 1 2008-10-24 US M 32 5 4 5 5 5 2 2 2008-10-28 US F 45 3 5 2 5 5 3 3 2008-10-01 UK F 25 3 5 5 5 2 4 4 2008-10-12 UK M 39 3 3 4 NA NA 5 5 2009-05-01 UK F NA 2 2 1 2 1 stringAsFactors agecat 1 FALSE Young 2 FALSE Young 3 FALSE Young 4 FALSE Young 5 FALSE <NA> > > Sys.Date() [1] "2017-09-07" > date() [1] "Thu Sep 07 22:40:04 2017" > today <- Sys.Date() > format(today, format="%B %d %Y") [1] "September 07 2017" > format(today, format="%A") [1] "Thursday" > > > startdate <- as.Date("2004-02-13") > enddate <- as.Date("2011-01-22") > days <- enddate - startdate > days Time difference of 2535 days > > > today <- Sys.Date() > dob <- as.Date("1988-06-30") > difftime(today, dob, units="weeks") Time difference of 1523 weeks > difftime(today, dob, units="days") Time difference of 10661 days > format(dob, format="%A") [1] "Thursday" > format(as.Date("2017-09-07"), format="%A") [1] "Thursday" > format(as.Date("2017-09-06"), format="%A") [1] "Wednesday" >
6.1 将日期转换为字符型变量
> dates [1] "1965-01-05" "1975-08-16" > strDates <- as.character(dates) > strDates [1] "1965-01-05" "1975-08-16" >
6.2 更进一步
help(as.Date) help(strftime) help(ISOdatetime) lubridate 包 fCalendar 包
7. 类型转换
> a <- c(1, 2, 3) > a [1] 1 2 3 > is.numeric(a) [1] TRUE > is.vector(a) [1] TRUE > > a <- as.character(a) > a [1] "1" "2" "3" > is.numeric(a) [1] FALSE > is.vector(a) [1] TRUE > is.character(a) [1] TRUE >
以上是关于R语言实战 - 基本数据管理的主要内容,如果未能解决你的问题,请参考以下文章