1+ # ' Create new features from a previously learned model
2+ # '
3+ # ' May improve the learning by adding new features to the training data based on the decision trees from a previously learned model.
4+ # '
5+ # ' @importFrom magrittr %>%
6+ # ' @importFrom Matrix cBind
7+ # ' @importFrom Matrix sparse.model.matrix
8+ # '
9+ # ' @param model decision tree boosting model learned on the original data
10+ # ' @param training.data original data (usually provided as a \code{dgCMatrix} matrix)
11+ # '
12+ # ' @return \code{dgCMatrix} matrix including both the original data and the new features.
13+ # '
14+ # ' @details
15+ # ' This is the function inspired from the paragraph 3.1 of the paper:
16+ # '
17+ # ' \strong{"Practical Lessons from Predicting Clicks on Ads at Facebook"}
18+ # '
19+ # ' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
20+ # ' Joaquin Quiñonero Candela)}
21+ # '
22+ # ' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
23+ # '
24+ # ' \url{https://research.facebook.com/publications/758569837499391/practical-lessons-from-predicting-clicks-on-ads-at-facebook/}.
25+ # '
26+ # ' Extract explaining the method:
27+ # '
28+ # ' "\emph{We found that boosted decision trees are a powerful and very
29+ # ' convenient way to implement non-linear and tuple transformations
30+ # ' of the kind we just described. We treat each individual
31+ # ' tree as a categorical feature that takes as value the
32+ # ' index of the leaf an instance ends up falling in. We use
33+ # ' 1-of-K coding of this type of features.
34+ # '
35+ # ' For example, consider the boosted tree model in Figure 1 with 2 subtrees,
36+ # ' where the first subtree has 3 leafs and the second 2 leafs. If an
37+ # ' instance ends up in leaf 2 in the first subtree and leaf 1 in
38+ # ' second subtree, the overall input to the linear classifier will
39+ # ' be the binary vector \code{[0, 1, 0, 1, 0]}, where the first 3 entries
40+ # ' correspond to the leaves of the first subtree and last 2 to
41+ # ' those of the second subtree.
42+ # '
43+ # ' [...]
44+ # '
45+ # ' We can understand boosted decision tree
46+ # ' based transformation as a supervised feature encoding that
47+ # ' converts a real-valued vector into a compact binary-valued
48+ # ' vector. A traversal from root node to a leaf node represents
49+ # ' a rule on certain features.}"
50+ # '
51+ # ' @examples
52+ # ' data(agaricus.train, package='xgboost')
53+ # ' data(agaricus.test, package='xgboost')
54+ # ' dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
55+ # ' dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
56+ # '
57+ # ' param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic')
58+ # ' nround = 4
59+ # '
60+ # ' bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
61+ # '
62+ # ' # Model accuracy without new features
63+ # ' accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
64+ # '
65+ # ' # Convert previous features to one hot encoding
66+ # ' new.features.train <- xgb.create.features(model = bst, agaricus.train$data)
67+ # ' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
68+ # '
69+ # ' # learning with new features
70+ # ' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
71+ # ' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
72+ # ' watchlist <- list(train = new.dtrain)
73+ # ' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
74+ # '
75+ # ' # Model accuracy with new features
76+ # ' accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
77+ # '
78+ # ' # Here the accuracy was already good and is now perfect.
79+ # ' cat(paste("The accuracy was", accuracy.before, "before adding leaf features and it is now", accuracy.after, "!\n"))
80+ # '
81+ # ' @export
82+ xgb.create.features <- function (model , training.data ){
83+ pred_with_leaf = predict(model , training.data , predleaf = TRUE )
84+ cols <- list ()
85+ for (i in 1 : length(trees )){
86+ # max is not the real max but it s not important for the purpose of adding features
87+ leaf.id <- sort(unique(pred_with_leaf [,i ]))
88+ cols [[i ]] <- factor (x = pred_with_leaf [,i ], level = leaf.id )
89+ }
90+ cBind(training.data , sparse.model.matrix( ~ . - 1 , as.data.frame(cols )))
91+ }
0 commit comments