R语言实战 - 高级数据管理
Posted 你的踏板车要滑向哪里
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了R语言实战 - 高级数据管理相关的知识,希望对你有一定的参考价值。
2.4 字符处理函数
> x <- c("ab", "cde", "fghij") > length(x) [1] 3 > nchar(x[3]) [1] 5 > > > x <- "abcdef" > substr(x, 2, 4) [1] "bcd" > substr(x, 2, 4) <- "22222" > x [1] "a222ef" > > > grep("A", c("b", "A", "c"), fixed=TRUE) [1] 2 > > > sub("\\s", ".", "Hello There") [1] "Hello.There" > > > y <- strsplit("abc", "") > y [[1]] [1] "a" "b" "c" > unlist(y)[2] [1] "b" > sapply(y, "[", 2) [1] "b" > > > paste("x", 1:3, sep="") [1] "x1" "x2" "x3" > paste("x", 1:3, sep="M") [1] "xM1" "xM2" "xM3" > paste("Today is", date()) [1] "Today is Sun Sep 10 20:39:26 2017" > > > toupper("abc") [1] "ABC" > tolower("ABC") [1] "abc" >
2.5 其他实用函数
> x <- c(2, 5, 6, 9) > length(x) [1] 4 > > > indices <- seq(1, 10, 2) > indices [1] 1 3 5 7 9 > > > y <- rep(1:3, 2) > y [1] 1 2 3 1 2 3 > > > z <- cut(y, 3) > z [1] (0.998,1.67] (1.67,2.33] (2.33,3] (0.998,1.67] (1.67,2.33] [6] (2.33,3] Levels: (0.998,1.67] (1.67,2.33] (2.33,3] > > > u <- pretty(y, 3) > u [1] 1.0 1.5 2.0 2.5 3.0 > > > firstname <- c("Jane") > cat("Hello", firstname, "\n") Hello Jane > > > name <- "Bob" > cat("Hello", name, "\b.\n", "Isn\‘t R", "\t", "GREAT?\n") Hello Bob. Isn‘t R GREAT? >
2.6 将函数应用于矩阵和数据框
> a <- 5 > sqrt(a) [1] 2.24 > b <- c(1.243, 5.654, 2.99) > round(b) [1] 1 6 3 > c <- matrix(runif(12), nrow=3) > c [,1] [,2] [,3] [,4] [1,] 0.9636 0.216 0.289 0.913 [2,] 0.2068 0.240 0.804 0.353 [3,] 0.0862 0.197 0.378 0.931 > log(c) [,1] [,2] [,3] [,4] [1,] -0.0371 -1.53 -1.241 -0.0912 [2,] -1.5762 -1.43 -0.218 -1.0402 [3,] -2.4511 -1.62 -0.972 -0.0710 > mean(c) [1] 0.465 >
> mydata <- matrix(rnorm(30), nrow=6) > mydata [,1] [,2] [,3] [,4] [,5] [1,] 0.459 1.203 1.234 0.591 -0.281 [2,] -1.261 0.769 -1.891 -0.435 0.812 [3,] -0.527 0.238 -0.223 -0.251 -0.208 [4,] -0.557 -1.415 0.768 -0.926 1.451 [5,] -0.374 2.934 0.388 1.087 0.841 [6,] -0.604 0.935 0.609 -1.944 -0.866 > apply(mydata, 1, mean) [1] 0.641 -0.401 -0.194 -0.136 0.975 -0.374 > apply(mydata, 2, mean) [1] -0.478 0.777 0.148 -0.313 0.292 > apply(mydata, 2, mean, trim=0.2) [1] -0.516 0.786 0.386 -0.255 0.291 >
3. 数据处理难题的一套解决方案
> options(digits=2) > > Student <- c("John Davis", "Angela Williams", "Bullwinkle Moose", + "David Jones", "Janice Markhammer", "Cheryl Cushing", + "Reuven Ytzrhak", "Greg Knox", "Joel England", + "Mary Rayburn") > Math <- c(502, 600, 412, 358, 495, 512, 410, 625, 573, 522) > Science <- c(95, 99, 80, 82, 75, 85, 80, 95, 89, 86) > English <- c(25, 22, 18, 15, 20, 28, 15, 30, 27, 18) > roster <- data.frame(Student, Math, Science, English, + stringsAsFactors=FALSE) > roster Student Math Science English 1 John Davis 502 95 25 2 Angela Williams 600 99 22 3 Bullwinkle Moose 412 80 18 4 David Jones 358 82 15 5 Janice Markhammer 495 75 20 6 Cheryl Cushing 512 85 28 7 Reuven Ytzrhak 410 80 15 8 Greg Knox 625 95 30 9 Joel England 573 89 27 10 Mary Rayburn 522 86 18 > z <- scale(roster[, 2:4]) > z Math Science English [1,] 0.013 1.078 0.587 [2,] 1.143 1.591 0.037 [3,] -1.026 -0.847 -0.697 [4,] -1.649 -0.590 -1.247 [5,] -0.068 -1.489 -0.330 [6,] 0.128 -0.205 1.137 [7,] -1.049 -0.847 -1.247 [8,] 1.432 1.078 1.504 [9,] 0.832 0.308 0.954 [10,] 0.243 -0.077 -0.697 attr(,"scaled:center") Math Science English 501 87 22 attr(,"scaled:scale") Math Science English 86.7 7.8 5.5 > score <- apply(z, 1, mean) > score [1] 0.56 0.92 -0.86 -1.16 -0.63 0.35 -1.05 1.34 0.70 -0.18 > roster <- cbind(roster, score) > roster Student Math Science English score 1 John Davis 502 95 25 0.56 2 Angela Williams 600 99 22 0.92 3 Bullwinkle Moose 412 80 18 -0.86 4 David Jones 358 82 15 -1.16 5 Janice Markhammer 495 75 20 -0.63 6 Cheryl Cushing 512 85 28 0.35 7 Reuven Ytzrhak 410 80 15 -1.05 8 Greg Knox 625 95 30 1.34 9 Joel England 573 89 27 0.70 10 Mary Rayburn 522 86 18 -0.18 > y <- quantile(roster$score, c(.8, .6, .4, .2)) > y 80% 60% 40% 20% 0.74 0.44 -0.36 -0.89 > roster$grade[score >= y[1]] <- "A" > roster$grade[score < y[1] & score >= y[2]] <- "B" > roster$grade[score < y[2] & score >= y[3]] <- "C" > roster$grade[score < y[3] & score >= y[4]] <- "D" > roster$grade[score < y[4]] <- "F" > roster Student Math Science English score grade 1 John Davis 502 95 25 0.56 B 2 Angela Williams 600 99 22 0.92 A 3 Bullwinkle Moose 412 80 18 -0.86 D 4 David Jones 358 82 15 -1.16 F 5 Janice Markhammer 495 75 20 -0.63 D 6 Cheryl Cushing 512 85 28 0.35 C 7 Reuven Ytzrhak 410 80 15 -1.05 F 8 Greg Knox 625 95 30 1.34 A 9 Joel England 573 89 27 0.70 B 10 Mary Rayburn 522 86 18 -0.18 C > name <- strsplit((roster$Student), " ") > name [[1]] [1] "John" "Davis" [[2]] [1] "Angela" "Williams" [[3]] [1] "Bullwinkle" "Moose" [[4]] [1] "David" "Jones" [[5]] [1] "Janice" "Markhammer" [[6]] [1] "Cheryl" "Cushing" [[7]] [1] "Reuven" "Ytzrhak" [[8]] [1] "Greg" "Knox" [[9]] [1] "Joel" "England" [[10]] [1] "Mary" "Rayburn" > Firstname <- sapply(name, "[", 1) > Firstname [1] "John" "Angela" "Bullwinkle" "David" "Janice" [6] "Cheryl" "Reuven" "Greg" "Joel" "Mary" > Lastname <- sapply(name, "[", 2) > Lastname [1] "Davis" "Williams" "Moose" "Jones" "Markhammer" [6] "Cushing" "Ytzrhak" "Knox" "England" "Rayburn" > roster <- cbind(Firstname, Lastname, roster[ , -1]) > roster Firstname Lastname Math Science English score grade 1 John Davis 502 95 25 0.56 B 2 Angela Williams 600 99 22 0.92 A 3 Bullwinkle Moose 412 80 18 -0.86 D 4 David Jones 358 82 15 -1.16 F 5 Janice Markhammer 495 75 20 -0.63 D 6 Cheryl Cushing 512 85 28 0.35 C 7 Reuven Ytzrhak 410 80 15 -1.05 F 8 Greg Knox 625 95 30 1.34 A 9 Joel England 573 89 27 0.70 B 10 Mary Rayburn 522 86 18 -0.18 C > roster[order(Lastname, Firstname), ] Firstname Lastname Math Science English score grade 6 Cheryl Cushing 512 85 28 0.35 C 1 John Davis 502 95 25 0.56 B 9 Joel England 573 89 27 0.70 B 4 David Jones 358 82 15 -1.16 F 8 Greg Knox 625 95 30 1.34 A 5 Janice Markhammer 495 75 20 -0.63 D 3 Bullwinkle Moose 412 80 18 -0.86 D 10 Mary Rayburn 522 86 18 -0.18 C 2 Angela Williams 600 99 22 0.92 A 7 Reuven Ytzrhak 410 80 15 -1.05 F >
quantile() http://blog.csdn.net/u012543538/article/details/17025789
scale() http://blog.sina.com.cn/s/blog_b623d3f40102v2zg.html
以上是关于R语言实战 - 高级数据管理的主要内容,如果未能解决你的问题,请参考以下文章
R语言实战英文书籍,配套源代码,帮助你学习R语言!(可下载)