|
1 | | -#' Convert tree model dump to data.table |
| 1 | +#' Parse boosted tree model text dump |
2 | 2 | #' |
3 | | -#' Read a tree model text dump and return a data.table. |
| 3 | +#' Parse a boosted tree model text dump and return a \code{data.table}. |
4 | 4 | #' |
5 | 5 | #' @importFrom data.table data.table |
6 | 6 | #' @importFrom data.table set |
|
13 | 13 | #' @importFrom stringr str_extract |
14 | 14 | #' @importFrom stringr str_split |
15 | 15 | #' @importFrom stringr str_trim |
16 | | -#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}. |
17 | | -#' @param model dump generated by the \code{xgb.train} function. Avoid the creation of a dump file. |
18 | | -#' @param text dump generated by the \code{xgb.dump} function. Avoid the creation of a dump file. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). |
19 | | -#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models. |
| 16 | +#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If the model already contains feature names, this argument should be \code{NULL} (default value). |
| 17 | +#' @param model object created by the \code{xgb.train} function. |
| 18 | +#' @param text \code{character} vector generated by the \code{xgb.dump} function. Model dump must include the gain per feature and per tree (parameter \code{with.stats = TRUE} in function \code{xgb.dump}). |
| 19 | +#' @param n_first_tree limit the plot to the \code{n} first trees. If set to \code{NULL}, all trees of the model are plotted. Performance can be low depending of the size of the model. |
20 | 20 | #' |
21 | | -#' @return A \code{data.table} of the features used in the model with their gain, cover and few other thing. |
| 21 | +#' @return A \code{data.table} of the features used in the model with their gain, cover and few other information. |
22 | 22 | #' |
23 | 23 | #' @details |
24 | | -#' General function to convert a text dump of tree model to a Matrix. The purpose is to help user to explore the model and get a better understanding of it. |
| 24 | +#' General function to convert a text dump of tree model to a \code{data.table}. |
25 | 25 | #' |
26 | | -#' The content of the \code{data.table} is organised that way: |
| 26 | +#' The purpose is to help user to explore the model and get a better understanding of it. |
| 27 | +#' |
| 28 | +#' The columns of the \code{data.table} are: |
27 | 29 | #' |
28 | 30 | #' \itemize{ |
29 | 31 | #' \item \code{ID}: unique identifier of a node ; |
|
35 | 37 | #' \item \code{Quality}: it's the gain related to the split in this specific node ; |
36 | 38 | #' \item \code{Cover}: metric to measure the number of observation affected by the split ; |
37 | 39 | #' \item \code{Tree}: ID of the tree. It is included in the main ID ; |
38 | | -#' \item \code{Yes.X} or \code{No.X}: data related to the pointer in \code{Yes} or \code{No} column ; |
| 40 | +#' \item \code{Yes.Feature}, \code{No.Feature}, \code{Yes.Cover}, \code{No.Cover}, \code{Yes.Quality} and \code{No.Quality}: data related to the pointer in \code{Yes} or \code{No} column ; |
39 | 41 | #' } |
40 | 42 | #' |
41 | 43 | #' @examples |
42 | 44 | #' data(agaricus.train, package='xgboost') |
43 | 45 | #' |
44 | | -#' #Both dataset are list with two items, a sparse matrix and labels |
45 | | -#' #(labels = outcome column which will be learned). |
46 | | -#' #Each column of the sparse Matrix is a feature in one hot encoding format. |
47 | | -#' train <- agaricus.train |
48 | | -#' |
49 | | -#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, |
| 46 | +#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2, |
50 | 47 | #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") |
51 | 48 | #' |
52 | | -#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix. |
| 49 | +#' # agaricus.train$data@@Dimnames[[2]] represents the column names of the sparse matrix. |
53 | 50 | #' xgb.model.dt.tree(feature_names = agaricus.train$data@@Dimnames[[2]], model = bst) |
54 | 51 | #' |
55 | 52 | #' @export |
|
0 commit comments