# Split training data into train and test to do cross-validation
inTrain <- createDataPartition(training$V15, p = 0.75, list = FALSE)
train.train <- training[inTrain,]
train.test <- training[-inTrain,]
#train the model
model <- train(V15 ~., data = train.train, method = "rpart")
vari <- varImp(model)
#Make predictions
rpartpred <- predict(model, train.test[,c(1:14)])
#Summarize results
results <- confusionMatrix(rpartpred, train.test$V15)
library(Hmisc);
#Always set the seed to get the same answer
set.seed(1337)
#Data description
summary(training)
describe(training)
head(training)
sapply(training, class)
str(training)
#import data
training <- read.csv("data/adult.data", header = FALSE, na.strings = "?")