R语言实战:机器学习与数据分析源代码5

Posted 白马负金羁

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了R语言实战:机器学习与数据分析源代码5相关的知识,希望对你有一定的参考价值。

本文辑录了《R语言实战——机器学习与数据分析》(电子工业出版社2016年出版)一书第6章至第7章前半部分(至136页)之代码。本书引言请见如下链接:
http://blog.csdn.net/baimafujinji/article/details/51596171



内容简介:本书系统地介绍了统计分析和机器学习领域中最为重要和流行的多种技术及它们的基本原理,在详解有关算法的基础上,结合大量R语言实例演示了这些理论在实践中的使用方法。具体内容被分成三个部分,即R语言编程基础、基于统计的数据分析方法以及机器学习理论。统计分析与机器学习部分又具体介绍了包括参数估计、假设检验、极大似然估计、非参数检验方法(包括列联分析、符号检验、符号秩检验等)、方差分析、线性回归(包括岭回归和Lasso方法)、逻辑回归、支持向量机、聚类分析(包括K均值算法和EM算法)和人工神经网络等内容。同时,统计理论的介绍也为深化读者对于后续机器学习部分的理解提供了很大助益。知识结构和阅读进度的安排上既兼顾了循序渐进的学习规律,亦统筹考虑了夯实基础的必要性

网上书店地址:

电子工业出版社官网
中国互动出版网China-pub
京东商城(1)
京东商城(2)


Chapter 6

P100~101

data(geyser, package = "MASS")
geyser

data = read.table("c:/car.txt", header=TRUE, quote="\\"")
data[1:2,]

mode(data)

P102

names(data)

dim(data)

data$lp100km
lp100km
attach(data)
lp100km
detach(data)
lp100km

data.fwf = read.fwf("c:/cities.txt", widths=c(7,7,7),
+ col.names=c("city","latitude","longitude"))
data.fwf

P103~104

data.excel = read.delim("clipboard")
data_excel[1:2,]

channel = odbcConnectExcel2007("c:/car.xlsx")
sqlTables(channel)

data_excel2 = sqlFetch(channel, "Sheet1")
data_excel2 = sqlQuery(channel, "select * from[Sheet1$]")
close(channel)
data_excel2[1:2,]

data_spss = read.spss("c:/car.sav", to.data.frame = T)
data_spss[1:2,]

P105

baseURL = "http://data.worldbank.org/indicator/NY.GDP.PCAP.CD/
+ countries/1W?display=default"
baseURL = gsub("\\\\n","",baseURL)
table = readhtmlTable(baseURL, header = TRUE, which = 1)
table = table[, 1:5]
names(table) = c("country", "2011", "2012", "2013", "2014")

table[c(40,95,71,11),]

P106~107

channel = odbcConnectAccess2007("c:/car.accdb")

data_access = sqlFetch(channel, "racv")
close(channel)
data_access[1:2,]

library(RJDBC)
con <- dbConnect(RSQLite::SQLite(),"C:/car.db")
dbListTables(con)
data_SQLite <- dbGetQuery(con, "select * from racv")
data_SQLite[1:2, ]

P108

car = file("d:/car.txt")
cat("Make lp100km mass.kg List.price",
+ "\\"Alpha Romeo\\" 9.5 1242 38500",
+ "\\"Audi A3\\" 8.8 1160 38700", file = car, sep = "\\n")
close(car)

data = USArrests[1:10,]
write.table(data, file = "c:/data.txt", col.names = T, quote = F)
read.table("c:/data.txt", header = T, row.names= 1)

data2 = read.table("c:/data.txt", header = T, row.names= 1)
write.csv(data2, file = "c:/data.csv", row.names = T, quote = F)
data.csv = read.csv("c:/data.csv", header = T, row.names = 1)

P111

ufc <- read.csv("c:/ufc.csv")
str(ufc)

table(ufc$species)
table(ufc$species,ufc$position)

mean(ufc$dbh.cm)
median(ufc$dbh.cm)
sd(ufc$dbh.cm)

P112

tapply(ufc$dbh.cm, ufc$species, mean)
tapply(ufc$dbh.cm, ufc$species, median)
tapply(ufc$dbh.cm, ufc$species, sd)

library(lattice)
xyplot(height.m ~ dbh.cm | species, data = ufc)

xyplot(height.m ~ dbh.cm, groups = species,
+ auto.key = list(space="right"), data = ufc)

P113~114

US_data = USArrests[1:10,]
US_data

names(US_data)

names(US_data) = c("MURDER","ASSAULT","URBANPOP","RAPE")
names(US_data)

names(US_data)[3] = "UrbanPop"
names(US_data)

dimnames(US_data)[[2]]
dimnames(US_data)[[1]]

dimnames(US_data)[[1]][1:3] = c("Alb", "Als", "Arz")
dimnames(US_data)[[1]][6:8] = c("Col", "Cnt", "Del")
dimnames(US_data)[[1]]

P115

air_data = airquality[1:7,1:4]
is.na(air_data)

sum(is.na(air_data))

complete.cases(air_data)
complete.cases(air_data$Ozone)

library(VIM)
air_data = airquality[1:31,1:4]
aggr(air_data, las = 1, numbers = TRUE)

P116~117

data1 = air_data[complete.cases(air_data),]
dim(data1)

data2 = air_data[(!is.na(air_data$Ozone))
+ &(!is.na(air_data$Solar.R)),]
dim(data2)

data3 = na.omit(air_data)
dim(data3)

air_data2 = air_data
air_data2$Ozone[is.na(air_data2$Ozone)] =
median(air_data$Ozone[!is.na(air_data$Ozone)])
air_data2$Solar.R[is.na(air_data2$Solar.R)] =
round(mean(air_data$Solar.R[!is.na(air_data$Solar.R)]))

Chapter 7

P119

goods <- list(name="Cookie", price=4.00, outdate=FALSE)

goods

typeof(goods$name)
typeof(goods$price)
typeof(goods$outdate)

goods2 <- list("Cookie", 4.00, FALSE)
goods2

P120

temp <- vector(mode="list")
temp[["name"]] <- "Cookie"
temp

goods$name
goods[["name"]]
goods[[1]]

h1 <- goods["name"]
h2 <- goods[1]

class(h1) #查看h1 的类型
h1
class(h2) #查看h2 的类型
h2
class(goods[["name"]])
class(goods[[1]])

P121~122

goods[1:2]
goods[[1:2]]

names(goods)

goods

goods$producer <- "A Company" #添加标签并初始化
goods

goods[["material"]] <- "flour"
goods[[6]] <- 1
goods

P123~124

goods$material <- NULL
goods

c(list(A=1,c="C"),list(new="NEW"))

unlist(goods)

ngoods <- unlist(goods)
names(ngoods)

names(ngoods) <- NULL
ngoods

mgoods <- unlist(goods)
names(mgoods)
unname(mgoods)

c(goods,recursive=T)

P125~126

temp <- list(1:10,-2:-9)
lapply(temp, mean)

sapply(temp,mean)
sapply(temp,mean,simplify=FALSE,USE.NAMES=FALSE)

a1 <- list(name="Cookie", price=4.0, outdate=FALSE)
a2 <- list(name="Milk", price=2.0, outdate=TRUE)
warehouse <- list(a1, a2)
warehouse

male <- c(124,88,200)
female <- c(108,56,221)
degree <- c("low","middle","high")
myopia <- data.frame(degree,male,female)
myopia

P127

myopia2 <- data.frame(c("low","middle","high"),
+ c(124,88,200),c(108,56,221))
myopia2

weight <- c(50, 70.6, 80, 59.5)
age <- c(20, 30)
wag <- data.frame(weight, age)
wag

str(myopia)

rat <- read.csv("F:/R/data/rat_fibres.csv")
rat

myopia$degree
myopia[["degree"]]
myopia[[1]]

myopia[1,]
myopia[,2]
myopia[3,2]

P129~130

(sub <- myopia[2:3,1:2])
class(sub)
(sub1 <- myopia[2:3,2])
class(sub1)

(sub2 <- myopia[2:3,2,drop=F])
class(sub2)

myopia[1:2]
myopia[1]
myopia[c("male", "female")]

myopia[myopia$male>100,]
myopia[male>100,]

male

male <- c(1,2,3)
myopia[male>100,]

myopia[myopia$male>100,]

P131~134

names <- c("Jack", "Steven")
ages <- c(15, 16)
students <- data.frame(names, ages, stringsAsFactors=F)
students

rbind(students, list("Sariah",15))

cbind(students, gender=c("M","M"))

students

students$gender <- c("M","M")
students

students
students$gender <- NULL
students

students
students2
merge(students,students2)

students
students3
merge(students,students3,by.x="names",by.y="na")

merge(students,students3,by.y="na",by.x="names",all.x=T)
merge(students,students3,by.y="na",by.x="names",all.y=T)

merge(students,students3,by.y="na",by.x="names",all=T)

students4
students
merge(students,students4,by.x="names",by.y="na")

students
tt<-rbind(students,list("Kevin",30))
tt$grade <- c(88,74,90,82)
tt

apply(tt[,2:3,drop=F],2,mean)

P135~136

(s1 <- lapply(students,sort))
(s2 <- sapply(students,sort))

as.data.frame(s1)
as.data.frame(s2)

ssample <- c("BJ","SH","CQ","SH")
(sf <- factor(ssample))

nsample <- c(2,3,3,5)
(nf <- factor(nsample))

str(nf)
unclass(nf)

str(sf)
unclass(sf)

以上是关于R语言实战:机器学习与数据分析源代码5的主要内容,如果未能解决你的问题,请参考以下文章

机器学习推荐书籍

1024程序员节|代码改变世界,科技创造未来 虚竹哥联合机械工业出版社好书相赠

1024程序员节|代码改变世界,科技创造未来 虚竹哥联合机械工业出版社好书相赠

Go学习资料

《自然语言处理实战入门》 ---- 笔试面试题:机器学习基础(41-60)

R语言机器学习篇——随机森林