|
| 1 | +require(ggplot2) |
| 2 | +require(caret) |
| 3 | + |
| 4 | + |
| 5 | +getwd() |
| 6 | +setwd('!!Regression/ML Process/!2 Restaurent Revenue Prediction/! 1 Single Tree Model/') |
| 7 | +getwd() |
| 8 | +training=read.csv('train.csv',header=T,sep=",",na.strings=c("NA",""," ")) |
| 9 | +dim(training) |
| 10 | +str(training) |
| 11 | +names(training) |
| 12 | +head(training) |
| 13 | +summary(training$revenue) |
| 14 | + |
| 15 | +summary(head(training)) |
| 16 | +str(head(training)) |
| 17 | + |
| 18 | +levels(training$City) |
| 19 | +levels(training$City.Group) |
| 20 | +levels(training$Type) |
| 21 | + |
| 22 | +summary(training$P1) |
| 23 | +x11() |
| 24 | +xtabs(revenue~City,training) |
| 25 | +plot(training$City.Group,training$revenue) |
| 26 | +dotplot(revenue~Open.Date,training) |
| 27 | + |
| 28 | +?trainControl |
| 29 | +set.seed(1000) |
| 30 | +control1=trainControl(method="cv",number=30,repeats=10) |
| 31 | +control2=trainControl(method="boot",number=10,repeats=5) |
| 32 | +control3=trainControl(method="repeatedcv",number=20,repeats=5) |
| 33 | + |
| 34 | +set.seed(10001) |
| 35 | +cart_model1=train(training[,-c(1,2,3,4,5,43)],training[,c("revenue")],method="rpart",trControl=control1,tuneLength=20) |
| 36 | +cart_model1 |
| 37 | +summary(cart_model1) |
| 38 | +cart_model1$method |
| 39 | +cart_model1$modelInfo |
| 40 | +cart_model1$modelType |
| 41 | +cart_model1$results |
| 42 | +cart_model1$resample |
| 43 | +cart_model1$bestTune |
| 44 | +cart_model1$call |
| 45 | +cart_model1$metric |
| 46 | +cart_model1$control |
| 47 | +cart_model1$finalModel |
| 48 | +varImp(cart_model1) |
| 49 | +X11() |
| 50 | +plot(varImp(cart_model1)) |
| 51 | +cart_model1$finalModel$variable.importance |
| 52 | +plot(cart_model1$finalModel$variable.importance,type='both') |
| 53 | +cart_model1$yLimits |
| 54 | +cart_model1$perfNames |
| 55 | +var(cart_model1$results) |
| 56 | + |
| 57 | +set.seed(10002) |
| 58 | +cart_model2=train(training[,-c(1,2,3,4,5,43)],training[,c("revenue")],method="rpart",trControl=control2,tuneLength=35) |
| 59 | +cart_model2 |
| 60 | +cart_model2$results |
| 61 | +cart_model2$resample |
| 62 | +X11() |
| 63 | +varImp(cart_model2) |
| 64 | +plot(varImp(cart_model2)) |
| 65 | + |
| 66 | +set.seed(10003) |
| 67 | +cart_model3=train(training[,-c(1,2,3,4,5,43)],training[,c("revenue")],method="rpart",trControl=control3) |
| 68 | +cart_model3 |
| 69 | +x11() |
| 70 | +plot(cart_model3) |
| 71 | +cart_model3$results |
| 72 | + |
| 73 | + |
| 74 | +test=read.csv('test.csv',header=T,sep=",",na.strings=c("NA"," ","")) |
| 75 | +dim(test) |
| 76 | +dim(training) |
| 77 | +names(test) |
| 78 | +names(training) |
| 79 | + |
| 80 | +levels(training$Type)=levels(test$Type) |
| 81 | + |
| 82 | +test$Prediction=predict(cart_model1,test[,-c(1,2,3,4,5,43)]) |
| 83 | +summary(test$Prediction) |
| 84 | +x11() |
| 85 | +write.csv(test[,c("Id","Prediction")],file="rpartmodel.csv",row.names=F) |
| 86 | + |
| 87 | + |
| 88 | + |
| 89 | + |
| 90 | + |
| 91 | + |
0 commit comments