算法思路:对于给出的待分类项,求解在此项出现的条件下各个类别出现的概率,哪个最大,就认为此待分类项属于哪个类别。 naiveBayes()函数
setwd("F:\\桌面")
Data <- read.csv("./test.txt",sep = '\t')
Data[, "MN"] <- as.factor(Data[, "MN"])
set.seed(1234)
ind <- sample(2, nrow(Data), replace = TRUE, prob = c(0.7, 0.3))
traindata <- Data[ind == 1, ]
testdata <- Data[ind == 2, ]
library(e1071)
naiveBayes.model <- naiveBayes(MN ~ ., data = traindata)
train_predict <- predict(naiveBayes.model, newdata = traindata)
test_predict <- predict(naiveBayes.model, newdata = testdata)
train_predictdata <- cbind(traindata, predictedclass = train_predict)
(train_confusion <- table(actual = traindata$MN, predictedclass = train_predict))
test_predictdata <- cbind(testdata, predictedclass = test_predict)
(test_confusion <- table(actual = testdata$MN, predictedclass = test_predict))
训练数据混淆矩阵
predictedclass
actual 0 1
0 32 185
1 18 588
测试数据混淆矩阵
predictedclass
actual 0 1
0 9 63
1 9 261
library(klaR)
NaiveBayes.model <- NaiveBayes(MN ~ ., data = traindata)
train_predict <- predict(NaiveBayes.model)
test_predict <- predict(NaiveBayes.model, newdata = testdata)
train_predictdata <- cbind(traindata, predictedclass = train_predict$class)
(train_confusion <- table(actual = traindata$MN, predictedclass = train_predict$class))
test_predictdata <- cbind(testdata, predictedclass = test_predict$class)
(test_confusion <- table(actual = testdata$MN, predictedclass = test_predict$class))
训练数据混淆矩阵
predictedclass
actual 0 1
0 32 185
1 18 588
测试数据混淆矩阵
predictedclass
actual 0 1
0 9 63
1 9 261
|