朴素贝叶斯函数的下标越界错误
Posted
技术标签:
【中文标题】朴素贝叶斯函数的下标越界错误【英文标题】:Subscript out of bounds error from naive bayes function 【发布时间】:2020-08-17 17:12:11 【问题描述】:我正在创建一个用户定义的朴素贝叶斯函数,用于显示准确性、错误分类和召回率。我还没有对召回进行编码。非常感谢您的帮助下面是我的朴素贝叶斯函数代码:
naive_bayes <- function(training.dataset, test.dataset)
##read data from file to a data frame
training.table <- trainSparse
test.table <- testSparse
##retrieve class and features from training data
training.class <- training.table[, 200]
training.features <- training.table[,-200]
remove(training.table)
##funciton for calculating priors
calculate.priors <- function(class.vector)
priors <- c()
for (class in unique(class.vector))
priors <- rbind(priors, c(class, length(class.vector[class.vector==class])/length(class.vector)))
colnames(priors) <- c("classification", "probability")
return (priors)
priors <- calculate.priors(training.class)
##Learn the features by calculating likelihood
likelihood.list <- list()
#calculate CPD by feature
for (i in 1:dim(training.features)[2])
feature.values <- training.features[, i]
unique.feature.values <- unique(feature.values)
likelihood.matrix <- matrix(rep(NA), nrow=dim(priors)[1], ncol=length(unique.feature.values))
colnames(likelihood.matrix) <- unique.feature.values
rownames(likelihood.matrix) <- priors[, "classification"]
for (item in unique.feature.values)
likelihood.item <- vector()
for (class in priors[, "classification"])
feature.value.inclass <- feature.values[training.class==class]
likelihood.value <- length(feature.value.inclass[feature.value.inclass==item])/length(feature.value.inclass)
likelihood.item <- c(likelihood.item, likelihood.value)
likelihood.matrix[, item] <- likelihood.item
likelihood.list[[i]] <- likelihood.matrix
##Predict class for the test dataset
#retrieve the features and target class of the testing dataset
test.features <- test.table[, -200]
test.target.class <- test.table[, 200]
test.predict.class <- rep(NA, length(test.target.class))
remove(test.table)
#calculate posteriors for each test data record
for (i in 1:dim(test.features)[1])
record <- test.features[i, ]
posterior <- vector()
#calculate posteriors for each possible class of that record
for (class in priors[, "classification"])
#initialize posterior as the prior value of that class
posterior.value <- as.numeric(priors[priors[, "classification"]==class, 2])
likelihood.v <- c()
for (item in 1:length(record))
likelihood.value <- likelihood.list[[item]][class, as.character(record[1, item])]
likelihood.v <- c(likelihood.v, likelihood.value)
posterior.value <- as.numeric(posterior.value) * as.numeric(likelihood.value)
posterior <- rbind(posterior, c(class, posterior.value))
predict.class <- posterior[posterior[,2]==max(as.numeric(posterior[,2])),1]
test.predict.class[i] <- predict.class
accuracy <- length(test.predict.class[test.predict.class==test.target.class])/length(test.target.class)
missclassification <- 1 - accuracy
print(paste("Accuracy:", accuracy))
print(paste("\nMissclassification:", missclassification))
我不确定为什么会收到以下错误:
Error in `[<-`(`*tmp*`, , item, value = likelihood.item) :
subscript out of bounds
下面是我的火车数据集的输出:
> dput(head(trainSparse,5))
structure(list(also = c(0, 0, 0, 0, 0), anoth = c(1, 0, 1, 1,
0), anyon = c(0, 0, 0, 0, 0), anyth = c(1, 0, 0, 0, 0), articl = c(0,
0, 0, 0, 0), avail = c(0, 0, 0, 0, 0), back = c(0, 0, 0, 0, 0
), base = c(0, 0, 0, 1, 0), believ = c(2, 0, 0, 0, 0), best = c(0,
0, 0, 0, 0), better = c(0, 0, 0, 0, 0), book = c(0, 0, 0, 0,
0), can = c(4, 0, 0, 0, 0), case = c(1, 0, 0, 0, 0), consid = c(0,
0, 0, 0, 0), day = c(0, 0, 0, 0, 0), differ = c(0, 0, 0, 0, 0
), distribut = c(0, 0, 0, 0, 0), drive = c(0, 0, 0, 0, 0), etc = c(0,
0, 0, 0, 0), even = c(3, 0, 0, 0, 0), exampl = c(0, 0, 0, 2,
0), exist = c(0, 0, 0, 0, 0), file = c(0, 0, 0, 0, 0), first = c(1,
0, 0, 0, 0), follow = c(2, 0, 0, 0, 0), found = c(0, 0, 0, 0,
0), get = c(0, 0, 0, 0, 0), god = c(3, 0, 0, 0, 0), great = c(0,
0, 0, 0, 0), group = c(0, 0, 0, 0, 0), help = c(0, 0, 0, 0, 0
), high = c(0, 0, 0, 0, 0), howev = c(0, 0, 0, 1, 0), idea = c(1,
0, 0, 0, 0), includ = c(0, 0, 0, 0, 0), inform = c(0, 0, 0, 0,
0), john = c(0, 0, 0, 0, 0), keyword = c(0, 0, 0, 0, 0), kind = c(0,
0, 0, 0, 0), know = c(1, 0, 0, 0, 0), like = c(0, 0, 0, 0, 0),
line = c(1, 1, 1, 3, 1), list = c(0, 0, 0, 0, 0), live = c(0,
0, 0, 0, 0), look = c(0, 0, 0, 0, 0), make = c(2, 0, 0, 0,
0), mani = c(0, 0, 0, 0, 0), may = c(0, 0, 0, 0, 0), must = c(0,
0, 0, 1, 0), nation = c(0, 0, 0, 0, 0), new = c(0, 0, 0,
0, 0), number = c(0, 0, 0, 0, 0), old = c(0, 0, 0, 0, 0),
one = c(1, 0, 0, 0, 0), opinion = c(0, 0, 0, 0, 0), organ = c(1,
1, 1, 1, 1), origin = c(0, 0, 0, 0, 0), peopl = c(1, 0, 2,
0, 0), person = c(0, 0, 0, 0, 0), place = c(2, 0, 0, 0, 0
), possibl = c(0, 0, 0, 0, 0), post = c(0, 0, 0, 0, 0), probabl = c(0,
0, 0, 0, 0), quit = c(0, 0, 0, 0, 0), rather = c(0, 0, 0,
0, 0), read = c(0, 0, 0, 0, 0), reason = c(1, 0, 0, 1, 0),
right = c(0, 0, 0, 0, 0), say = c(0, 0, 0, 0, 0), second = c(0,
0, 0, 0, 0), see = c(0, 0, 0, 1, 0), seem = c(0, 0, 0, 0,
0), send = c(0, 0, 0, 0, 0), set = c(0, 0, 0, 0, 0), state = c(0,
0, 0, 1, 0), subject = c(1, 1, 1, 1, 1), sure = c(0, 0, 0,
0, 0), system = c(0, 0, 0, 6, 0), take = c(2, 0, 0, 0, 0),
technolog = c(0, 1, 0, 1, 1), think = c(0, 0, 0, 1, 0), thought = c(0,
0, 0, 0, 0), time = c(1, 0, 0, 0, 0), tri = c(0, 0, 0, 0,
0), true = c(0, 0, 1, 0, 0), univers = c(1, 0, 1, 0, 0),
usa = c(0, 0, 0, 0, 0), use = c(1, 0, 2, 0, 0), version = c(0,
0, 0, 0, 0), way = c(0, 0, 0, 0, 0), well = c(1, 0, 0, 3,
0), will = c(4, 0, 0, 0, 0), without = c(0, 0, 0, 0, 0),
word = c(0, 0, 0, 0, 0), work = c(0, 0, 0, 0, 0), world = c(0,
0, 0, 0, 0), write = c(0, 0, 0, 0, 0), wrote = c(0, 0, 0,
0, 0), abl = c(0, 0, 0, 0, 0), actual = c(0, 0, 0, 0, 0),
allow = c(1, 0, 0, 0, 0), alway = c(0, 0, 0, 0, 0), answer = c(0,
0, 0, 0, 0), around = c(0, 0, 0, 0, 0), ask = c(0, 0, 0,
0, 0), call = c(0, 0, 0, 0, 0), cant = c(1, 0, 0, 1, 0),
care = c(0, 0, 0, 0, 0), caus = c(1, 0, 1, 0, 0), certain = c(0,
0, 0, 1, 0), chang = c(0, 0, 1, 0, 1), claim = c(1, 0, 0,
0, 0), control = c(0, 0, 0, 0, 0), cours = c(1, 0, 0, 0,
0), doesnt = c(0, 0, 0, 0, 0), done = c(0, 0, 1, 0, 0), dont = c(1,
0, 0, 0, 0), either = c(0, 0, 0, 0, 0), els = c(0, 0, 0,
0, 0), end = c(0, 0, 0, 1, 0), enough = c(0, 0, 0, 0, 0),
ever = c(0, 0, 0, 0, 0), everi = c(0, 0, 0, 1, 0), fact = c(1,
0, 0, 0, 0), far = c(0, 0, 0, 0, 0), find = c(0, 0, 0, 0,
0), game = c(0, 0, 0, 0, 0), general = c(0, 0, 0, 0, 0),
give = c(2, 0, 0, 0, 0), good = c(3, 0, 0, 1, 0), govern = c(0,
0, 0, 0, 0), happen = c(1, 0, 0, 0, 0), hard = c(0, 0, 0,
0, 0), hope = c(0, 0, 0, 0, 0), institut = c(0, 1, 0, 1,
1), isnt = c(0, 0, 0, 0, 0), just = c(0, 1, 0, 0, 0), keep = c(0,
0, 0, 0, 0), law = c(0, 0, 0, 0, 0), least = c(0, 0, 0, 0,
0), let = c(1, 0, 0, 0, 0), littl = c(0, 0, 0, 0, 0), long = c(2,
0, 0, 0, 0), made = c(0, 0, 1, 0, 0), mayb = c(0, 0, 0, 0,
0), mean = c(0, 0, 0, 0, 0), messag = c(0, 0, 0, 0, 0), might = c(0,
0, 0, 0, 0), much = c(0, 0, 0, 0, 0), need = c(0, 0, 0, 0,
0), note = c(0, 0, 0, 0, 0), noth = c(1, 0, 0, 0, 0), now = c(0,
0, 0, 0, 1), order = c(0, 0, 0, 0, 0), other = c(0, 0, 2,
0, 0), part = c(0, 0, 0, 0, 0), play = c(0, 0, 0, 0, 0),
pleas = c(0, 0, 0, 0, 0), point = c(0, 0, 0, 0, 0), power = c(1,
0, 0, 0, 0), problem = c(0, 0, 0, 0, 0), public = c(0, 0,
0, 0, 0), put = c(0, 0, 0, 0, 0), question = c(0, 0, 0, 0,
0), real = c(0, 0, 0, 0, 0), realli = c(0, 0, 0, 0, 0), requir = c(0,
1, 0, 0, 0), respons = c(0, 0, 0, 0, 0), run = c(0, 0, 0,
0, 0), said = c(1, 0, 0, 0, 0), scienc = c(0, 0, 0, 0, 0),
seen = c(0, 0, 0, 0, 0), sever = c(0, 0, 0, 0, 0), show = c(1,
0, 0, 0, 0), sinc = c(0, 1, 0, 0, 0), someon = c(0, 0, 0,
0, 0), someth = c(1, 0, 0, 0, 0), start = c(0, 0, 0, 0, 0
), still = c(0, 0, 0, 0, 0), support = c(0, 0, 0, 0, 0),
talk = c(0, 0, 0, 0, 1), tell = c(0, 0, 0, 0, 0), thank = c(0,
0, 0, 0, 0), that = c(0, 0, 0, 0, 0), thing = c(0, 0, 0,
0, 0), though = c(0, 0, 0, 0, 0), two = c(0, 0, 0, 0, 0),
want = c(1, 0, 0, 0, 0), year = c(0, 0, 0, 0, 1), yes = c(0,
0, 0, 1, 0), bad = c(0, 0, 2, 0, 0), xnewsread = c(0, 0,
0, 0, 0), research = c(0, 0, 0, 0, 0), interest = c(1, 0,
0, 0, 0), lot = c(1, 0, 0, 0, 0), didnt = c(0, 1, 0, 0, 0
), nntppostinghost = c(0, 1, 0, 1, 1), name = c(0, 0, 2,
0, 0), ive = c(0, 0, 0, 0, 0), never = c(0, 0, 0, 0, 0),
inc = c(0, 0, 0, 0, 0), comput = c(0, 0, 0, 0, 0), replyto = c(0,
0, 0, 0, 0), email = c(0, 0, 0, 0, 0), program = c(0, 0,
0, 0, 0), servic = c(0, 0, 0, 0, 0), bit = c(0, 0, 0, 0,
0), dept = c(0, 0, 0, 0, 0), come = c(0, 0, 0, 0, 0), access = c(0,
0, 0, 0, 0), got = c(0, 0, 0, 0, 0), yet = c(0, 0, 0, 0,
0), suggest = c(0, 0, 0, 0, 0), engin = c(0, 0, 0, 0, 0),
last = c(0, 0, 0, 0, 0), your = c(0, 0, 0, 0, 0), next. = c(0,
0, 0, 0, 0), david = c(0, 0, 0, 0, 0), internet = c(0, 0,
0, 0, 0), depart = c(0, 0, 0, 0, 0), softwar = c(0, 0, 0,
0, 0), center = c(0, 0, 0, 0, 0), window = c(0, 0, 0, 0,
0), Negative = structure(c(2L, 1L, 2L, 1L, 2L), .Label = c("FALSE",
"TRUE"), class = "factor")), row.names = c("6", "7", "8",
"9", "11"), class = "data.frame")
测试数据遵循相同的布局!
【问题讨论】:
只需在您的问题中添加几行数据即可。使用dput(head(data,20)
提取示例。外部链接不是展示数据的好方法。
【参考方案1】:
好的。这是一个调试任务,但我想我发现了错误。 你的定义是
unique.feature.values <- unique(feature.values)
likelihood.matrix <- matrix(rep(NA),
nrow=dim(priors)[1],
ncol=length(unique.feature.values))
让
> unique.feature.values
[1] 4 0
和
> length(unique.feature.values)
[1] 2
在这种情况下,您的矩阵 likelihood.matrix
有
> dim(likelihood.matrix)
[1] 1 2
此时我们忽略dim(priors)[1]
部分并假设它是1
。您的第二个 for 循环由
for (item in unique.feature.values)
[ some code ]
likelihood.matrix[, item] <- likelihood.item
这就是它崩溃的地方。 item
是 unique.feature.values
的一个元素。所以正如上面假设的unique.feature.values
的第一个元素等于4
。但是likelihood.matrix
的维度为1x2
,因此
likelihood.matrix[,item] <- likelihood.item
结果
likelihood.matrix[,4] <- likelihood.item
这是超出范围的。要么您必须通过 nrow = max(unique.feature.values)
定义您的 likelihood.matrix
,要么您必须将第二个 for 循环更改为
for (item in 1:length(unique.feature.values))
【讨论】:
解决了 for 循环问题!尽管在最后一个 for 循环语句中出现了另一条错误消息: 可能性.list[[item]][class, as.character(record[1, item])] 中的错误:下标超出范围 我认为问题出在record[1, item]
。 record
不是向量,所以你不能使用 [1, item]
而只是 [1]
或 [item]
取决于你的任务?以上是关于朴素贝叶斯函数的下标越界错误的主要内容,如果未能解决你的问题,请参考以下文章
朴素贝叶斯:朴素贝叶斯定义朴素贝叶斯公式分解朴素贝叶斯分类流程高斯型朴素贝叶斯多项式朴素贝叶斯伯努利型朴素贝叶斯朴素贝叶斯预测概率校准朴素贝叶斯优缺点