|
6 | 6 |
|
7 | 7 | import numpy as np |
8 | 8 | import scipy.sparse as sp |
| 9 | + |
| 10 | +from abc import ABCMeta, abstractmethod |
9 | 11 | import warnings |
10 | 12 |
|
11 | 13 | from ..externals.joblib import Parallel, delayed |
12 | 14 |
|
13 | | -from ..base import RegressorMixin |
14 | | -from ..base import ClassifierMixin |
| 15 | +from ..base import BaseEstimator, ClassifierMixin, RegressorMixin |
15 | 16 | from ..feature_selection.selector_mixin import SelectorMixin |
16 | | -from .base import BaseSGD |
17 | | -from ..utils import atleast2d_or_csr, check_arrays |
| 17 | +from ..utils import array2d, atleast2d_or_csr, check_arrays, safe_asarray |
18 | 18 | from ..utils.extmath import safe_sparse_dot |
19 | | -from ..utils import safe_asarray |
20 | 19 | from ..utils import deprecated |
21 | 20 |
|
22 | 21 | from .sgd_fast import plain_sgd as plain_sgd |
|
29 | 28 | from .sgd_fast import EpsilonInsensitive |
30 | 29 |
|
31 | 30 |
|
| 31 | +class BaseSGD(BaseEstimator): |
| 32 | + """Base class for dense and sparse SGD.""" |
| 33 | + |
| 34 | + __metaclass__ = ABCMeta |
| 35 | + |
| 36 | + def __init__(self, loss, penalty='l2', alpha=0.0001, |
| 37 | + rho=0.85, fit_intercept=True, n_iter=5, shuffle=False, |
| 38 | + verbose=0, epsilon=0.1, seed=0, learning_rate="optimal", |
| 39 | + eta0=0.0, power_t=0.5, warm_start=False): |
| 40 | + self.loss = str(loss) |
| 41 | + self.penalty = str(penalty).lower() |
| 42 | + self.epsilon = float(epsilon) |
| 43 | + self._set_loss_function(self.loss) |
| 44 | + self._set_penalty_type(self.penalty) |
| 45 | + |
| 46 | + self.alpha = float(alpha) |
| 47 | + if self.alpha < 0.0: |
| 48 | + raise ValueError("alpha must be greater than zero") |
| 49 | + self.rho = float(rho) |
| 50 | + if self.rho < 0.0 or self.rho > 1.0: |
| 51 | + raise ValueError("rho must be in [0, 1]") |
| 52 | + self.fit_intercept = bool(fit_intercept) |
| 53 | + self.n_iter = int(n_iter) |
| 54 | + if self.n_iter <= 0: |
| 55 | + raise ValueError("n_iter must be greater than zero") |
| 56 | + if not isinstance(shuffle, bool): |
| 57 | + raise ValueError("shuffle must be either True or False") |
| 58 | + self.shuffle = bool(shuffle) |
| 59 | + self.seed = seed |
| 60 | + self.verbose = int(verbose) |
| 61 | + |
| 62 | + self.learning_rate = str(learning_rate) |
| 63 | + self._set_learning_rate(self.learning_rate) |
| 64 | + self.eta0 = float(eta0) |
| 65 | + self.power_t = float(power_t) |
| 66 | + if self.learning_rate != "optimal": |
| 67 | + if eta0 <= 0.0: |
| 68 | + raise ValueError("eta0 must be greater than 0.0") |
| 69 | + self.coef_ = None |
| 70 | + self.warm_start = warm_start |
| 71 | + |
| 72 | + self._init_t() |
| 73 | + |
| 74 | + @abstractmethod |
| 75 | + def fit(self, X, y): |
| 76 | + """Fit model.""" |
| 77 | + |
| 78 | + @abstractmethod |
| 79 | + def predict(self, X): |
| 80 | + """Predict using model.""" |
| 81 | + |
| 82 | + def _init_t(self): |
| 83 | + self.t_ = 1.0 |
| 84 | + if self.learning_rate == "optimal": |
| 85 | + typw = np.sqrt(1.0 / np.sqrt(self.alpha)) |
| 86 | + # computing eta0, the initial learning rate |
| 87 | + eta0 = typw / max(1.0, self.loss_function.dloss(-typw, 1.0)) |
| 88 | + # initialize t such that eta at first example equals eta0 |
| 89 | + self.t_ = 1.0 / (eta0 * self.alpha) |
| 90 | + |
| 91 | + def _set_learning_rate(self, learning_rate): |
| 92 | + learning_rate_codes = {"constant": 1, "optimal": 2, "invscaling": 3} |
| 93 | + try: |
| 94 | + self.learning_rate_code = learning_rate_codes[learning_rate] |
| 95 | + except KeyError: |
| 96 | + raise ValueError("learning rate %s" |
| 97 | + "is not supported. " % learning_rate) |
| 98 | + |
| 99 | + def _set_loss_function(self, loss): |
| 100 | + """Get concrete LossFunction""" |
| 101 | + raise NotImplementedError("BaseSGD is an abstract class.") |
| 102 | + |
| 103 | + def _set_penalty_type(self, penalty): |
| 104 | + penalty_types = {"none": 0, "l2": 2, "l1": 1, "elasticnet": 3} |
| 105 | + try: |
| 106 | + self.penalty_type = penalty_types[penalty] |
| 107 | + except KeyError: |
| 108 | + raise ValueError("Penalty %s is not supported. " % penalty) |
| 109 | + |
| 110 | + def _validate_sample_weight(self, sample_weight, n_samples): |
| 111 | + """Set the sample weight array.""" |
| 112 | + if sample_weight == None: |
| 113 | + # uniform sample weights |
| 114 | + sample_weight = np.ones(n_samples, dtype=np.float64, order='C') |
| 115 | + else: |
| 116 | + # user-provided array |
| 117 | + sample_weight = np.asarray(sample_weight, dtype=np.float64, |
| 118 | + order="C") |
| 119 | + if sample_weight.shape[0] != n_samples: |
| 120 | + raise ValueError("Shapes of X and sample_weight do not match.") |
| 121 | + return sample_weight |
| 122 | + |
| 123 | + def _set_coef(self, coef_): |
| 124 | + """Make sure that coef_ is fortran-style and 2d. |
| 125 | +
|
| 126 | + Fortran-style memory layout is needed to ensure that computing |
| 127 | + the dot product between input ``X`` and ``coef_`` does not trigger |
| 128 | + a memory copy. |
| 129 | + """ |
| 130 | + self.coef_ = np.asfortranarray(array2d(coef_)) |
| 131 | + |
| 132 | + def _allocate_parameter_mem(self, n_classes, n_features, coef_init=None, |
| 133 | + intercept_init=None): |
| 134 | + """Allocate mem for parameters; initialize if provided.""" |
| 135 | + if n_classes > 2: |
| 136 | + # allocate coef_ for multi-class |
| 137 | + if coef_init is not None: |
| 138 | + coef_init = np.asarray(coef_init, order="C") |
| 139 | + if coef_init.shape != (n_classes, n_features): |
| 140 | + raise ValueError("Provided coef_ does not match dataset. ") |
| 141 | + self.coef_ = coef_init |
| 142 | + else: |
| 143 | + self.coef_ = np.zeros((n_classes, n_features), |
| 144 | + dtype=np.float64, order="C") |
| 145 | + |
| 146 | + # allocate intercept_ for multi-class |
| 147 | + if intercept_init is not None: |
| 148 | + intercept_init = np.asarray(intercept_init, order="C") |
| 149 | + if intercept_init.shape != (n_classes, ): |
| 150 | + raise ValueError("Provided intercept_init " \ |
| 151 | + "does not match dataset.") |
| 152 | + self.intercept_ = intercept_init |
| 153 | + else: |
| 154 | + self.intercept_ = np.zeros(n_classes, dtype=np.float64, |
| 155 | + order="C") |
| 156 | + else: |
| 157 | + # allocate coef_ for binary problem |
| 158 | + if coef_init is not None: |
| 159 | + coef_init = np.asarray(coef_init, dtype=np.float64, |
| 160 | + order="C") |
| 161 | + coef_init = coef_init.ravel() |
| 162 | + if coef_init.shape != (n_features,): |
| 163 | + raise ValueError("Provided coef_init does not " \ |
| 164 | + "match dataset.") |
| 165 | + self.coef_ = coef_init |
| 166 | + else: |
| 167 | + self.coef_ = np.zeros(n_features, dtype=np.float64, order="C") |
| 168 | + |
| 169 | + # allocate intercept_ for binary problem |
| 170 | + if intercept_init is not None: |
| 171 | + intercept_init = np.asarray(intercept_init, dtype=np.float64) |
| 172 | + if intercept_init.shape != (1,) and intercept_init.shape != (): |
| 173 | + raise ValueError("Provided intercept_init " \ |
| 174 | + "does not match dataset.") |
| 175 | + self.intercept_ = intercept_init.reshape(1,) |
| 176 | + else: |
| 177 | + self.intercept_ = np.zeros(1, dtype=np.float64, order="C") |
| 178 | + |
| 179 | + def _check_fit_data(self, X, y): |
| 180 | + n_samples, _ = X.shape |
| 181 | + if n_samples != y.shape[0]: |
| 182 | + raise ValueError("Shapes of X and y do not match.") |
| 183 | + |
| 184 | + |
32 | 185 | def _make_dataset(X, y_i, sample_weight): |
33 | 186 | """Returns Dataset object + intercept_decay""" |
34 | 187 | if sp.issparse(X): |
|
0 commit comments