-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBayes.R
More file actions
51 lines (37 loc) · 1.56 KB
/
Bayes.R
File metadata and controls
51 lines (37 loc) · 1.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Naive Bayes - 37.01899%
# Error rate for normal Naive Bayes with and without scaleing: 40.11119
# Error rate for normal Naive Bayes without sig5: 39.73638 (without scaling = 39.76137)
# from e1071 I got 39.87381 error rate -> 39.54898
# k for K-FOLD CROSS VALIDATION
k = 9
# I use caret instead of e1071 cause I want to use cross validation
library('caret')
library(pROC)
raw_train <- read.csv(file="training.csv")
raw_test <- read.csv(file="test.csv")
train = raw_train
test = raw_test
# Correct the data types
train$relevance <- as.factor(train$relevance)
train$is_homepage <- as.factor(train$is_homepage)
train$relevance <- revalue(train$relevance, c("1"="Yes", "0"="No"))
train$sig3 = log(train$sig3+1)
train$sig4 = log(train$sig4+1)
train$sig5 = log(train$sig5+1)
# we use 80% for training and rest for validation
set.seed(2325)
inTraining <- createDataPartition(train$relevance, p = .8, list = FALSE)
training <- train[ inTraining,]
testing <- train[-inTraining,]
# index 9 is for sig5
train.predictors = training[,-c(1,2,13)]
train.response = training[,13]
test.predictors = testing[,-c(1,2,13)]
test.response = testing[,13]
# I use K-fold cross validation - "cv"
train_control <- trainControl(method='cv', number=k, returnResamp='none', summaryFunction = twoClassSummary, classProbs = TRUE)
# I train the naive bayes model
fit_nb <- train(train.predictors, train.response, method = "nb", trControl=train_control, metric = "ROC")
pred_nb = predict(fit_nb, test.predictors)
error_nb = mean(test.response != pred_nb)*100
cat("Error rate for normal Naive Bayes: ", error_nb)