在单个 ROC 图上绘制线性判别分析、分类树和朴素贝叶斯曲线
Posted
技术标签:
【中文标题】在单个 ROC 图上绘制线性判别分析、分类树和朴素贝叶斯曲线【英文标题】:Plotting a linear discriminant analysis, classification tree and Naive Bayes Curve on a single ROC plot 【发布时间】:2015-12-18 11:08:32 【问题描述】:数据显示在页面的最底部,称为 LDA.scores'。这是一个分类任务,我在数据集上执行了三种监督机器学习分类技术。提供所有编码以显示这些 ROC 曲线是如何产生的。我很抱歉问了一个加载的问题,但我已经尝试使用不同的代码组合解决这些问题近两周了,所以如果有人可以帮助我,那么谢谢你。主要问题是朴素贝叶斯曲线显示满分1,这显然是错误的,我无法解决如何将线性判别分析曲线合并到单个ROC图中与提供的编码进行比较。
-
在“MASS”包中执行的线性判别分析 (LDA)
“kLAR”包中的朴素贝叶斯 (NB)
“rpart”包中的分类树 (CT)
目标
显示 ROC 曲线的单个 ROC 图,用于比较每种分类技术,并附有图例。 计算每种分类技术的曲线下面积问题
-
每种分类技术都在不同的 R 包中执行,我无法将这些 ROC 曲线合并到一个图上。所有错误消息都显示在页面底部
LDA 和 NB 的 ROC 曲线看起来都是虚假的
我无法应用图例并遇到错误消息
我提供了所有三种技术的编码,因此任何人都可以逐步评估我的逻辑
线性判别分析
library(MASS)
predictors<-as.matrix(LDA.scores[,2:13])
response<-as.factor(LDA.scores[,1])
#Perform LDA
Family.lda<-lda(response~predictors, CV=TRUE)
predict.Family <-predict(Family.lda)
tab <- table(response, Family.lda$class)
构造混淆矩阵来预测类别
conCV1 <- rbind(tab[1, ]/sum(tab[1, ]), tab[2, ]/sum(tab[2, ]))
dimnames(conCV1) <- list(Actual = c("No", "Yes"), "Predicted (cv)"= c("No", "Yes"))
print(round(conCV1, 3))
绘制判别分数
library(lattice)
windows(width=10, height=7)
densityplot(~predict.Family$x, groups=LDA.scores$Family)
计算混淆矩阵的函数
confusion <- function(actual, predicted, names = NULL, printit = TRUE, prior = NULL)
if (is.null(names))
names <- levels(actual)
tab <- table(actual, predicted)
acctab <- t(apply(tab, 1, function(x) x/sum(x)))
dimnames(acctab) <- list(Actual = names, "Predicted (cv)" = names)
if (is.null(prior))
relnum <- table(actual)
prior <- relnum/sum(relnum)
acc <- sum(tab[row(tab) == col(tab)])/sum(tab)
else
acc <- sum(prior * diag(acctab))
names(prior) <- names
if (printit)
print(round(c("Overall accuracy" = acc, "Prior frequency" = prior),
+ 4))
if (printit)
cat("\nConfusion matrix", "\n")
print(round(acctab, 4))
invisible(acctab)
更改比例以创建训练和测试集 (70:30)
prior <- c(0.7, 0.3)
lda.70.30 <- lda(response~predictors, CV=TRUE, prior=prior)
confusion(response, lda.70.30$class, prior = c(0.7, 0.3))
创建 ROC 曲线的函数
truepos <- numeric(19)
falsepos <- numeric(19)
p1 <- (1:19)/20
for (i in 1:19)
p <- p1[i]
Family.ROC <- lda(response~predictors, CV = TRUE, prior = c(p, 1 - p))
confmat <- confusion(LDA.scores$Family, Family.ROC$class, printit = FALSE)
falsepos[i] <- confmat[1, 2]
truepos[i] <- confmat[2, 2]
绘制 ROC 曲线
windows(width=10, height=7)
LDA.ROC<-plot(truepos~falsepos, type = "l", lwd=2,
xlab = "False positive rate (Specificity)",
ylab = "True positive rate (Sensitivity)" col ="green")
abline(a=0,b=1, col="red")
图1
分类树
生成测试和训练集 70:30
index<-1:nrow(LDA.scores)
trainindex.LDA=sample(index, trunc(length(index)*0.70), replace=FALSE)
LDA.70.trainset<-LDA.scores[trainindex,]
LDA.30.testset<-LDA.scores[-trainindex,]
使用 70 % 的训练集种植树木
#Grow Tree the tree with the 70 % training set
library(rpart)
tree.split3<-rpart(Family~., data=LDA.70.trainset3, method="class")
summary(tree.split3)
print(tree.split3)
plot(tree.split3)
text(tree.split3,use.n=T,digits=0)
printcp(tree.split3)
使用测试集和训练集进行分类树预测 (70:30)
res3=predict(tree.split3,newdata=LDA.30.testset3)
res4=as.data.frame(res3)
为分类分组因子的二项分布创建二元系统(0 或 1)
res4$actual2 = NA
res4$actual2[res4$actual=="G8"]= 1
res4$actual2[res4$actual=="V4"]= 0
绘制 ROC 曲线
roc_pred <- prediction(re4$Predicted.prob, res4$actual2)
perf <- performance(roc_pred, "tpr", "fpr")
plot(perf, col="blue", lwd=2)
abline(0,1,col="grey")
图2
朴素贝叶斯
library(klaR)
library(caret)
生成测试和训练集 70:30
trainIndex <- createDataPartition(LDA.scores$Family, p=0.70, list=FALSE)
sig.train=LDA.scores[trainIndex,]
sig.test=LDA.scores[-trainIndex,]
建立NB模型并对测试集进行预测
sig.train$Family<-as.factor(sig.train$Family)
sig.test$Family<-as.factor(sig.test$Family)
nbmodel<-NaiveBayes(Family~., data=sig.train)
prediction<-predict(nbmodel, sig.test[2:13])
NB<-as.data.frame(prediction)
colnames(NB)<-c("Family", "Actual", "Predicted")
为分类因子的二项分布创建二元系统(0 或 1)
NB$actual2 = NA
NB$actual2[NB$Family=="V4"]=1
NB$actual2[NB$Family=="G8"]=0
NB2<-as.data.frame(NB)
绘制 ROC 曲线 - 这条曲线看起来很可疑
library(ROCR)
windows(width=10, height=7)
roc_pred.NB<- prediction(NB2$Predicted, NB2$actual2)
perf.NB <- performance(roc_pred.NB, "tpr", "fpr")
plot(perf.NB, col="orange", lwd=2)
abline(0,1,col="grey")
图 3:这条 ROC 曲线明显错误
将所有 ROC 曲线绘制到一个图上
windows(width=10, height=7)
plot(fit.perf, col="blue", lwd=2); #CT
plot(LDA.ROC, col="green", lwd=2, add=T); #LDA
plot(perf.NB,lwd=2,col="orange", lwd=2, add=T);NB
abline(0,1,col="red", lwd=2)
错误信息
Warning in min(x) : no non-missing arguments to min; returning Inf
Warning in max(x) : no non-missing arguments to max; returning -Inf
Warning in min(x) : no non-missing arguments to min; returning Inf
Warning in max(x) : no non-missing arguments to max; returning -Inf
Warning in plot.window(...) : "add" is not a graphical parameter
Error in plot.window(...) : need finite 'xlim' values
plot(fit.NB,lwd=2,col="orange", lwd=2, add=T); #NB
Error in plot(fit.NB, lwd = 2, col = "orange", lwd = 2, add = T) :
error in evaluating the argument 'x' in selecting a method for function
Warning in plot.window(...) : "add" is not a graphical parameter
Error in plot.window(...) : need finite 'xlim' values
曲线下面积
auc1<-performance(fit.pred,"auc")#CT
auc2<-performance(fit.NB, "auc")#NB
我不确定如何计算提供的 LDA 代码的曲线下面积
传奇的制作
此代码产生错误消息
legend(c('fit.pred',fit.NB','LDA.ROC'), col=c('blue',orange','green'),lwd=3)
名为 LDA.scores 的数据
Family Swimming Not.Swimming Running Not.Running
1 v4 -0.48055680 -0.086292700 -0.157157188 -0.438809944
2 v4 0.12600625 -0.074481895 0.057316151 -0.539013927
3 v4 0.06823834 -0.056765686 0.064711783 -0.539013927
4 v4 0.67480139 -0.050860283 0.153459372 -0.539013927
5 v4 0.64591744 -0.050860283 0.072107416 -0.472211271
6 v4 0.21265812 -0.068576492 0.057316151 -0.071395338
7 v4 -0.01841352 -0.068576492 -0.053618335 -0.071395338
8 v4 0.12600625 0.055436970 0.012942357 0.296019267
9 v4 -0.22060120 0.114491000 -0.038827070 0.563229889
10 v4 0.27042603 -0.021333268 0.049920519 -0.037994010
11 v4 0.03935439 -0.044954880 0.012942357 0.195815284
12 v4 -0.45167284 0.008193747 -0.075805232 -0.171599321
13 v4 -0.04729748 -0.056765686 0.035129254 -0.305204632
14 v4 -0.10506539 0.008193747 -0.046222702 0.062209973
15 v4 0.09712230 0.037720761 0.109085578 -0.104796666
16 v4 -0.07618143 0.014099150 -0.038827070 0.095611301
17 v4 0.29930998 0.108585597 0.057316151 0.028808645
18 v4 0.01047043 -0.074481895 0.020337989 -0.071395338
19 v4 -0.24948516 0.002288344 0.035129254 0.329420595
20 v4 -0.04729748 0.049531567 0.057316151 0.296019267
21 v4 -0.01841352 0.043626164 0.005546724 -0.171599321
22 v4 -0.19171725 0.049531567 -0.016640173 -0.071395338
23 v4 -0.48055680 0.020004552 -0.142365923 0.596631217
24 v4 0.01047043 0.008193747 0.220020063 0.062209973
25 v4 -0.42278889 0.025909955 -0.149761556 0.028808645
26 v4 -0.45167284 0.031815358 -0.134970291 -0.138197994
27 v4 -0.30725307 0.049531567 0.042524886 0.095611301
28 v4 0.24154207 -0.039049477 0.072107416 -0.104796666
29 v4 1.45466817 -0.003617059 0.064711783 0.296019267
30 v4 -0.01841352 0.002288344 0.020337989 0.028808645
31 G8 0.38596185 0.084963985 0.049920519 -0.037994010
32 G8 0.15489021 -0.080387298 0.020337989 -0.338605960
33 G8 -0.04729748 0.067247776 0.138668107 0.129012629
34 G8 0.27042603 0.031815358 0.049920519 0.195815284
35 G8 -0.07618143 0.037720761 0.020337989 -0.037994010
36 G8 -0.10506539 0.025909955 -0.083200864 0.396223251
37 G8 -0.01841352 0.126301805 -0.024035805 0.362821923
38 G8 0.01047043 0.031815358 -0.016640173 -0.138197994
39 G8 0.06823834 0.037720761 -0.038827070 0.262617940
40 G8 -0.16283329 -0.050860283 -0.038827070 -0.405408616
41 G8 -0.01841352 -0.039049477 0.005546724 -0.205000649
42 G8 -0.39390493 -0.003617059 -0.090596497 0.129012629
43 G8 -0.04729748 0.008193747 -0.009244540 0.195815284
44 G8 0.01047043 -0.039049477 -0.016640173 -0.205000649
45 G8 0.01047043 -0.003617059 -0.075805232 -0.004592683
46 G8 0.06823834 0.008193747 -0.090596497 -0.205000649
47 G8 -0.04729748 0.014099150 0.012942357 -0.071395338
48 G8 -0.22060120 -0.015427865 -0.075805232 -0.171599321
49 G8 -0.16283329 0.020004552 -0.061013967 -0.104796666
50 G8 -0.07618143 0.031815358 -0.038827070 -0.138197994
51 G8 -0.22060120 0.020004552 -0.112783394 -0.104796666
52 G8 -0.19171725 -0.033144074 -0.068409599 -0.071395338
53 G8 -0.16283329 -0.039049477 -0.090596497 -0.104796666
54 G8 -0.22060120 -0.009522462 -0.053618335 -0.037994010
55 G8 -0.13394934 -0.003617059 -0.075805232 -0.004592683
56 G8 -0.27836911 -0.044954880 -0.090596497 -0.238401977
57 G8 -0.04729748 -0.050860283 0.064711783 0.028808645
58 G8 0.01047043 -0.044954880 0.012942357 -0.305204632
59 G8 0.12600625 -0.068576492 0.042524886 -0.305204632
60 G8 0.06823834 -0.033144074 -0.061013967 -0.271803305
61 G8 0.06823834 -0.027238671 -0.061013967 -0.037994010
62 G8 0.32819394 -0.068576492 0.064711783 -0.372007288
63 G8 0.32819394 0.014099150 0.175646269 0.095611301
64 G8 -0.27836911 0.002288344 -0.068409599 0.195815284
65 G8 0.18377416 0.025909955 0.027733621 0.162413956
66 G8 0.55926557 -0.009522462 0.042524886 0.229216612
67 G8 -0.19171725 -0.009522462 -0.038827070 0.229216612
68 G8 -0.19171725 0.025909955 -0.009244540 0.396223251
69 G8 0.01047043 0.155828820 0.027733621 0.630032545
70 G8 -0.19171725 0.002288344 -0.031431438 0.463025906
71 G8 -0.01841352 -0.044954880 -0.046222702 0.496427234
72 G8 -0.07618143 -0.015427865 -0.031431438 0.062209973
73 G8 -0.13394934 0.008193747 -0.068409599 -0.071395338
74 G8 -0.39390493 0.037720761 -0.120179026 0.229216612
75 G8 -0.04729748 0.008193747 0.035129254 -0.071395338
76 G8 -0.27836911 -0.015427865 -0.061013967 -0.071395338
77 G8 0.70368535 -0.056765686 0.397515240 -0.205000649
78 G8 0.29930998 0.079058582 0.138668107 0.229216612
79 G8 -0.13394934 -0.056765686 0.020337989 -0.305204632
80 G8 0.21265812 0.025909955 0.035129254 0.396223251
Family Fighting Not.Fighting Resting Not.Resting
1 v4 -0.67708172 -0.097624192 0.01081204879 -0.770462870
2 v4 -0.58224128 -0.160103675 -0.03398160805 0.773856776
3 v4 -0.11436177 -0.092996082 0.05710879700 -2.593072768
4 v4 -0.34830152 -0.234153433 -0.04063432116 -2.837675606
5 v4 -0.84568695 -0.136963126 -0.13084281035 -1.680828329
6 v4 -0.32933343 -0.157789620 -0.02997847693 -0.947623773
7 v4 0.35984044 -0.157789620 0.12732080268 -0.947623773
8 v4 -0.32511830 -0.023574435 -0.10281705810 -2.607366431
9 v4 1.51478626 0.001880170 0.08155320398 -0.637055341
10 v4 0.11114773 -0.224897213 -0.17932134171 -1.818396455
11 v4 0.27975296 -0.109194467 -0.14338902206 2.170944974
12 v4 -0.89626852 -0.069855533 -0.02058415581 -0.658126752
13 v4 0.12379312 -0.123078796 -0.11528274705 -0.808243774
14 v4 0.66965255 -0.111508522 -0.11764091337 2.377766908
15 v4 1.56536783 -0.143905291 0.04389156236 2.111220276
16 v4 0.56427428 -0.099938247 0.01399844913 -0.322326312
17 v4 -0.71291033 -0.118450687 -0.05755560242 2.218858946
18 v4 -0.75927677 1.519900201 0.04711630687 3.920878638
19 v4 -0.75295407 0.177748344 0.01584280360 -0.304945754
20 v4 -1.00164679 0.108326696 0.09348590900 1.038591535
21 v4 -1.03958296 0.652129604 0.09677967302 1.752268128
22 v4 0.82139726 0.638245274 0.02053612974 0.907465624
23 v4 -1.07541157 -0.072169588 -0.03608286844 1.137774798
24 v4 -1.03115270 0.087500202 0.07805238146 -3.663486997
25 v4 -0.98900139 -0.180930170 -0.00009686695 2.350924346
26 v4 -1.06908888 -0.146219346 -0.02285413055 0.067293462
27 v4 -1.20186549 -0.049029039 -0.00424187149 -1.898454393
28 v4 0.58324237 -0.125392851 0.01446241356 -2.497647463
29 v4 -0.97003330 -0.134649071 0.03187450017 -4.471716512
30 v4 0.22917139 -0.060599313 0.11323315542 -1.465081244
31 G8 0.41042201 -0.086053918 -0.01171898422 -0.232806371
32 G8 -1.11545531 -0.197128554 -0.06499053655 -3.043893581
33 G8 -0.19023412 -0.083739863 -0.07758659568 -2.323908986
34 G8 0.25446217 -0.092996082 -0.07399758157 1.437404886
35 G8 -0.05324237 0.844196163 -0.11503350996 1.079056696
36 G8 0.09007207 0.055103433 0.02167111711 1.110865131
37 G8 1.21129685 1.971140911 0.01904454162 1.404724068
38 G8 0.62539368 -0.111508522 0.05768779393 -1.706664294
39 G8 1.32932051 -0.224897213 0.05555202379 0.736746935
40 G8 0.40199175 -0.187872334 -0.01031175326 -0.005516985
41 G8 0.44625062 -0.160103675 -0.00458313459 1.727170333
42 G8 0.60221046 -0.194814499 0.17430774591 1.685228831
43 G8 0.33665722 -0.053657149 0.00481502094 1.836016918
44 G8 -0.63493041 -0.206384774 -0.00928412956 0.466173920
45 G8 -0.28296700 0.108326696 0.09047589183 1.697173771
46 G8 -0.32722587 -0.164731785 0.08917985896 1.057314221
47 G8 -0.11646933 0.187004564 -0.05671203072 0.933704227
48 G8 -0.10171637 0.025020719 -0.05333390954 0.482480775
49 G8 0.13643851 0.057417488 0.08541446168 0.680713089
50 G8 -0.57802615 0.434608441 0.10140397965 0.090780703
51 G8 0.05002833 0.057417488 -0.02509342995 0.680713089
52 G8 -0.16072820 0.073615872 -0.03698779080 -0.982921741
53 G8 -0.29139726 -0.035144709 0.04609635201 -2.281900378
54 G8 0.13222338 -0.051343094 0.06524159499 0.972089090
55 G8 -0.41152848 -0.134649071 0.08459773090 0.027767791
56 G8 0.68229794 -0.185558279 -0.03239032508 -0.162881500
57 G8 -0.24292325 0.013450444 -0.03208740616 -0.530221948
58 G8 -0.11646933 -0.134649071 0.06264952925 -0.385741863
59 G8 -0.21341734 -0.215640993 0.05241547086 -0.972251823
60 G8 -0.24292325 -0.185558279 -0.03437271856 0.002267358
61 G8 -0.24292325 -0.005061995 -0.03437271856 -1.134447998
62 G8 0.09007207 -0.238781543 -0.06747523863 0.626424009
63 G8 -0.34197883 -0.099938247 -0.01270059491 -0.722750217
64 G8 -0.30825778 -0.167045840 0.10014629095 -0.382722075
65 G8 -0.08696342 -0.208698829 -0.02872845706 -0.356550578
66 G8 -0.81196590 0.048161268 -0.00950652573 -1.851614124
67 G8 0.49683219 0.048161268 0.04867308008 -1.851614124
68 G8 -0.13754498 -0.037458764 0.02486518629 1.731465143
69 G8 -0.48318570 0.161549960 -0.05951115497 0.254319006
70 G8 0.39988418 0.031962884 -0.02353665674 2.043778341
71 G8 0.90148474 -0.102252302 -0.01967923345 -0.289913920
72 G8 0.28396809 -0.123078796 -0.10148651548 1.386940871
73 G8 1.05322945 -0.139277181 -0.00480936518 0.054207713
74 G8 1.24923303 -0.208698829 -0.00098261723 0.594212936
75 G8 0.47154141 -0.118450687 -0.13970798195 1.551821303
76 G8 1.27873894 -0.072169588 -0.00286148145 3.100704184
77 G8 0.05002833 -0.044400929 -0.05492902692 0.327263666
78 G8 1.54218461 -0.030516599 0.10732815358 -1.055195336
79 G8 0.74763247 -0.132335016 0.11660744219 -1.134447998
80 G8 0.11747042 -0.037458764 -0.02016620439 1.730726972
Family Fighting Hunting Not.Hunting Grooming
1 v4 -0.67708172 0.114961983 0.2644238 0.105443109
2 v4 -0.58224128 0.556326739 -1.9467488 -0.249016133
3 v4 -0.11436177 0.326951992 2.1597867 -0.563247851
4 v4 -0.34830152 0.795734469 2.1698228 -0.611969290
5 v4 -0.84568695 0.770046573 0.2554708 -0.230476117
6 v4 -0.32933343 0.736574466 0.1225477 -0.270401826
7 v4 0.35984044 0.215724268 0.1225477 1.057451389
8 v4 -0.32511830 -0.200731013 0.2593696 -0.260830004
9 v4 1.51478626 -2.160535836 0.8687508 1.030589923
10 v4 0.11114773 0.660462182 1.7955299 -0.809959417
11 v4 0.27975296 -0.293709087 -0.8377330 -0.292132450
12 v4 -0.89626852 0.565754284 1.3339454 -0.573854465
13 v4 0.12379312 -0.499644710 -0.5100101 -0.372285683
14 v4 0.66965255 0.080624964 -2.6852985 -0.470590886
15 v4 1.56536783 -4.076143639 -0.8432925 1.657328707
16 v4 0.56427428 -0.127040484 -0.8662526 -0.161145079
17 v4 -0.71291033 0.661240603 -2.1990933 -0.381900622
18 v4 -0.75927677 0.294950237 -3.5062302 -0.121909231
19 v4 -0.75295407 0.548369546 -1.3326746 -0.338568723
20 v4 -1.00164679 0.137622686 -1.7580862 -0.312742050
21 v4 -1.03958296 0.019302681 -2.2730277 0.708985315
22 v4 0.82139726 -0.043057497 -3.1829838 -0.378408200
23 v4 -1.07541157 0.351515502 -0.3762928 -0.304161903
24 v4 -1.03115270 -0.007163636 1.3605877 -0.431053223
25 v4 -0.98900139 0.253780410 -1.1388134 -0.554883286
26 v4 -1.06908888 0.700680605 0.6629041 0.113074697
27 v4 -1.20186549 0.340704098 0.9979915 -0.693545361
28 v4 0.58324237 -1.727041782 1.5589254 0.180163686
29 v4 -0.97003330 0.209410408 1.7613786 -0.258156792
30 v4 0.22917139 -2.441026901 1.3929340 0.276959818
31 G8 0.41042201 0.383257784 -0.5374467 0.165978418
32 G8 -1.11545531 -1.098682982 2.9654839 0.148947473
33 G8 -0.19023412 0.873144122 2.5120581 -0.846910101
34 G8 0.25446217 0.968889915 -0.4130434 -0.938661624
35 G8 -0.05324237 0.936455703 -2.5993065 -0.949914982
36 G8 0.09007207 -0.467815937 -1.0766479 1.474170593
37 G8 1.21129685 -1.239490708 -4.1335895 1.357023559
38 G8 0.62539368 0.177235670 2.4989896 1.393241265
39 G8 1.32932051 -4.736158229 -0.5718146 2.467225606
40 G8 0.40199175 0.342693397 0.5675981 0.648320657
41 G8 0.44625062 0.488950070 -1.6998195 0.709588943
42 G8 0.60221046 -0.415575233 -1.4313741 0.728473890
43 G8 0.33665722 0.353937257 -2.2985148 0.379706002
44 G8 -0.63493041 0.262083568 0.2245685 -0.367629121
45 G8 -0.28296700 0.574316915 -1.0020637 0.280710938
46 G8 -0.32722587 0.323665326 -1.1559252 0.119455912
47 G8 -0.11646933 0.786566398 0.1746772 -0.858206576
48 G8 -0.10171637 0.718065343 -0.2673407 -0.552555005
49 G8 0.13643851 0.584868846 -0.1203383 -0.335378116
50 G8 -0.57802615 -0.053955393 0.6359729 0.057885811
51 G8 0.05002833 0.738563765 -0.1203383 -0.188308359
52 G8 -0.16072820 0.778263240 2.1906890 -0.545138998
53 G8 -0.29139726 0.751018502 1.6039070 0.198100074
54 G8 0.13222338 0.297804447 -0.5217068 -0.514310832
55 G8 -0.41152848 0.102161281 0.3866610 -0.036323341
56 G8 0.68229794 0.371667959 1.6179863 -0.176365139
57 G8 -0.24292325 0.631574111 1.4206594 -0.269668849
58 G8 -0.11646933 -0.004568899 1.6827511 0.003731717
59 G8 -0.21341734 0.214080935 1.0590019 0.036586351
60 G8 -0.24292325 0.796339908 1.2727184 -0.615289246
61 G8 -0.24292325 0.796339908 2.6745838 -0.615289246
62 G8 0.09007207 -0.396720145 0.2644238 0.290800156
63 G8 -0.34197883 0.441985331 1.4545220 -0.520648930
64 G8 -0.30825778 -2.489721464 1.3587105 1.711267220
65 G8 -0.08696342 0.407907785 0.8136610 -0.273333736
66 G8 -0.81196590 0.554423932 1.3666527 -0.594420949
67 G8 0.49683219 0.697912886 1.3666527 -0.446661330
68 G8 -0.13754498 0.491198842 -1.3307974 -0.333825929
69 G8 -0.48318570 0.604848320 -0.1305910 -0.601492025
70 G8 0.39988418 0.773938679 -0.5078441 -0.712559657
71 G8 0.90148474 0.734412186 -0.1166561 -0.548803885
72 G8 0.28396809 1.145505011 -1.3062489 -0.921846260
73 G8 1.05322945 0.616784110 0.9039851 -0.165629176
74 G8 1.24923303 0.329287256 0.3647117 0.111867440
75 G8 0.47154141 -0.016764163 -1.1586689 -0.476713403
76 G8 1.27873894 0.007799347 -3.0386529 0.215087903
77 G8 0.05002833 0.209496900 -1.5080522 0.324560232
78 G8 1.54218461 -5.031179821 1.6811626 2.366893936
79 G8 0.74763247 -0.325105405 1.6851337 1.351590903
80 G8 0.11747042 -0.756350687 -1.3315194 0.375911766
Family Not.Grooming
1 v4 0.019502286
2 v4 -0.290451956
3 v4 0.359948884
4 v4 0.557840914
5 v4 0.117453376
6 v4 0.126645924
7 v4 0.126645924
8 v4 0.196486873
9 v4 0.152780228
10 v4 0.354469789
11 v4 -0.261430968
12 v4 0.176448238
13 v4 -0.007374708
14 v4 -0.557848621
15 v4 -0.213674557
16 v4 -0.005819262
17 v4 -0.470070992
18 v4 -0.786078864
19 v4 0.006063789
20 v4 -0.271842650
21 v4 -0.349418792
22 v4 -0.338096262
23 v4 -0.165119403
24 v4 0.346566439
25 v4 -0.344191931
26 v4 0.074321265
27 v4 0.179825379
28 v4 0.278407054
29 v4 0.593125727
30 v4 0.199177375
31 G8 -0.058900625
32 G8 0.633875622
33 G8 0.428150308
34 G8 -0.206023441
35 G8 -0.436958199
36 G8 -0.291839246
37 G8 -0.907641911
38 G8 0.448567295
39 G8 -0.127186127
40 G8 0.024715134
41 G8 -0.416345030
42 G8 -0.330697382
43 G8 -0.469720666
44 G8 -0.047494017
45 G8 -0.301732446
46 G8 -0.138901021
47 G8 0.098101379
48 G8 -0.002063769
49 G8 -0.028324190
50 G8 0.071630763
51 G8 -0.028324190
52 G8 0.295110588
53 G8 0.347112947
54 G8 -0.083577573
55 G8 -0.036886152
56 G8 0.189045953
57 G8 0.467596992
58 G8 0.303378276
59 G8 0.218879697
60 G8 0.092005711
61 G8 0.270111340
62 G8 -0.012909856
63 G8 0.262292068
64 G8 0.107125772
65 G8 0.123422927
66 G8 0.299426602
67 G8 0.299426602
68 G8 -0.326871824
69 G8 -0.022088391
70 G8 -0.428508341
71 G8 -0.014675497
72 G8 -0.114462294
73 G8 0.087227267
74 G8 -0.031519161
75 G8 -0.159318008
76 G8 -0.397875854
77 G8 0.101520559
78 G8 0.244481505
79 G8 0.529968994
80 G8 -0.326619590
【问题讨论】:
【参考方案1】:首先,将数据子集化为训练和测试子集的最重要问题之一是在子集化之前,数据必须随机化,否则您将在训练和测试数据子集中对类别进行不均等划分。
以下代码的一些注释。为简化模型拟合方法,我使用了 caret
包。
library(pROC)
library(MASS)
library(caret)
set.seed(1234)
mydat <- read.table("~/Desktop/family.txt", header = TRUE, stringsAsFactors= FALSE)
mydat$Family <- factor(mydat$Family, levels = c("v4", "G8"))
# Randomly permute the data before subsetting
mydat_idx <- sample(1:nrow(mydat), replace = FALSE)
mydat <- mydat[mydat_idx, ]
mydat_resampled_idx <- createDataPartition(mydat_idx, times = 1, p = 0.7, list = FALSE)
mydat_resampled <- mydat[mydat_resampled_idx, ] # Training portion of the data
线性判别分析
lda_mod <-train(x = mydat_resampled[, 2:9], y = as.factor(mydat_resampled[, 1]),
method = "lda", trControl = trainControl(method = "cv", classProbs = TRUE))
# Generate model predictions
lda_pred <- predict(lda_mod, newdata = mydat[ , 2:9], type = "prob")
# Store the predictions with the data set
mydat['lda_pred'] <- lda_pred["G8"] # Here we only want the probability associated
# with the class (Y = 1), or in this case, G8
朴素贝叶斯
nb_tune <- data.frame(usekernel =TRUE, fL = 0)
nb_mod <- train(x = mydat_resampled[, 2:9], y = as.factor(mydat_resampled[, 1]),
method = "nb", trControl = trainControl(method = "cv", classProbs = TRUE), tuneGrid = nb_tune)
# Model predictions
nb_pred <- predict(nb_mod, newdata = mydat[ , 2:9], type = "prob")
mydat['nb_pred'] <- nb_pred["G8"]
分类树
ct_mod <- train(x = mydat_resampled[, 2:9], y = as.factor(mydat_resampled[, 1]),
method = "rpart", trControl = trainControl(method = "cv", classProbs = TRUE))
ct_pred <- predict(ct_mod, newdata = mydat[ , 2:9], type = "prob")
mydat['ct_pred'] <- ct_pred["G8"]
数据的训练和测试部分的 ROC 曲线
编辑:更改了 AUC 曲线的计算和绘图以使用 pROC 包
mydat$binary_response <- as.numeric(mydat$Family) - 1 # convert factor to 0, 1 with G8 = 1
lda_train_roc <- roc(binary_response ~ lda_pred, data = mydat[mydat_resampled_idx, ], ci = TRUE)
nb_train_roc <- roc(binary_response ~ nb_pred, data = mydat[mydat_resampled_idx, ], ci = TRUE)
ct_train_roc <- roc(binary_response ~ ct_pred, data = mydat[mydat_resampled_idx, ], ci = TRUE)
lda_test_roc <- roc(binary_response ~ lda_pred, data = mydat[-mydat_resampled_idx, ], ci = TRUE)
nb_test_roc <- roc(binary_response ~ nb_pred, data = mydat[-mydat_resampled_idx, ], ci = TRUE)
ct_test_roc <- roc(binary_response ~ ct_pred, data = mydat[-mydat_resampled_idx, ], ci = TRUE)
par(mfrow = c(2, 1))
plot(lda_train_roc, las = 1, main = "Training data")
plot(nb_train_roc, add = TRUE, col = "red")
plot(ct_train_roc, add = TRUE, col = "blue")
legend(0.4, 0.4, legend = c("lda", "nb", "ct"), lty = c(1,1,1), col = c("black", "red", "blue"))
plot(lda_test_roc, las = 1, main = "Testing data")
plot(nb_test_roc, add = TRUE, col = "red")
plot(ct_test_roc, add = TRUE, col = "blue")
legend(0.4, 0.4, legend = c("lda", "nb", "ct"), lty = c(1,1,1), col = c("black", "red", "blue"))
par(mfrow = c(1, 1))
# AUC with 95% CL
lda_train_roc$ci[c(2, 1, 3)] # 0.8353741 0.7235472 0.9472011
nb_train_roc$ci[c(2, 1, 3)] # 0.9714286 [0.9303684, 1.0000000]
ct_train_roc$ci[c(2, 1, 3)] # 0.7619048 [0.6524637, 0.8713458]
lda_test_roc$ci[c(2, 1, 3)] # 0.6148148 [0.3555396, 0.8740900]
nb_test_roc$ci[c(2, 1, 3)] # 0.7407407 [0.5345984, 0.9468831]
ct_test_roc$ci[c(2, 1, 3)] # 0.6000000 [0.4139795, 0.7860205]
【讨论】:
嗨,吉姆,太好了,非常感谢。你是一颗闪亮的星星。除了 NB 模型,所有代码都可以工作。我只是想知道您是否对这些警告的含义有任何想法。如果可以的话,谢谢。 FUN(X[[i]], ...) 中的警告:数值为 0,所有类别的概率为观察 2 FUN(X[[i]], ...) 中的警告:数值观察到的所有类的 0 概率 2 FUN 中的警告(X[[i]],...):所有观察到的类的数字 0 概率 3 警告中的 FUN(X[[i]],...): 嗨,吉姆,这太棒了。我不明白为什么 NB 不适合我。我刚收到这个温暖的信息。对不起,我不是忘恩负义。非常感谢。 警告消息:1:在 FUN(X[[i]], ...) 中:所有类别的数字 0 概率,观察值为 1 2:在 FUN(X[[i]], . ..) : 观察值为 1 的所有类的数值 0 概率 嗨 [Jim],我在我的 9 个专栏中进行了尝试,它确实有效。我的问题是总共有 12 个参数 [2:13],它不适用于完整的数据集。其余的代码很棒。谢谢你。很困惑为什么会这样:) –以上是关于在单个 ROC 图上绘制线性判别分析、分类树和朴素贝叶斯曲线的主要内容,如果未能解决你的问题,请参考以下文章