# Create the training and testing data samples
df_car <- read.csv("https://raw.githubusercontent.com/jcbonilla/BusinessAnalytics/master/BAData/car-data.csv", stringsAsFactors = T)
head(df_car)
df_car$Boot.Space <- as.factor(df_car$Boot.Space)
df_car$Safety <- as.factor(df_car$Safety)
car_model <- glm(Car.Acceptability ~ Car.Price+Main..Price+Doors+Persons+Boot.Space+Safety, data = df_car, family = "binomial"(link="logit"))
summary(car_model)
set.seed(100)
split <- (0.8)
trainingRowIndex <- sample(1:nrow(df_car),(split)*nrow(df_car))
trainingData <- df_car[trainingRowIndex, ]
testData <- df_car[-trainingRowIndex, ]
# Develop the model on training data and make prediction on testing data
library(rpart)
formula <- Car.Acceptability ~ Car.Price+Main..Price+Doors+Persons+Boot.Space+Safety
car.rpart <- rpart(formula, data = trainingData, control = rpart.control(minsplit = 2))
install.packages("rattle")
library(rattle)
fancyRpartPlot(car.rpart,tweak=1.2)
#Select the tree with the minimum prediction error
opt <- which.min(car.rpart$cptable[, "xerror"])
cp <- car.rpart$cptable[opt, "CP"]
# Prune tree
car.prune <- prune(car.rpart, cp=cp)
fancyRpartPlot(car.prune,tweak=1)
# Make prediction and calculate accuracy
car.predict <- predict(car.prune, testData, type="class")
accuracy <- sum(testData$Car.Acceptability==car.predict)/length(car.predict)
accuracy