@@ -137,14 +137,9 @@ def _check_precisions(precisions, covariance_type, n_components, n_features):
137137
138138###############################################################################
139139# Gaussian mixture parameters estimators (used by the M-Step)
140- ESTIMATE_PRECISION_ERROR_MESSAGE = ("The algorithm has diverged because of "
141- "too few samples per components. Try to "
142- "decrease the number of components, "
143- "or increase reg_covar." )
144140
145-
146- def _estimate_gaussian_precisions_cholesky_full (resp , X , nk , means , reg_covar ):
147- """Estimate the full precision matrices.
141+ def _estimate_gaussian_covariances_full (resp , X , nk , means , reg_covar ):
142+ """Estimate the full covariance matrices.
148143
149144 Parameters
150145 ----------
@@ -160,27 +155,20 @@ def _estimate_gaussian_precisions_cholesky_full(resp, X, nk, means, reg_covar):
160155
161156 Returns
162157 -------
163- precisions_chol : array, shape (n_components, n_features, n_features)
164- The cholesky decomposition of the precision matrix .
158+ covariances : array, shape (n_components, n_features, n_features)
159+ The covariance matrix of the current components .
165160 """
166161 n_components , n_features = means .shape
167- precisions_chol = np .empty ((n_components , n_features , n_features ))
162+ covariances = np .empty ((n_components , n_features , n_features ))
168163 for k in range (n_components ):
169164 diff = X - means [k ]
170- covariance = np .dot (resp [:, k ] * diff .T , diff ) / nk [k ]
171- covariance .flat [::n_features + 1 ] += reg_covar
172- try :
173- cov_chol = linalg .cholesky (covariance , lower = True )
174- except linalg .LinAlgError :
175- raise ValueError (ESTIMATE_PRECISION_ERROR_MESSAGE )
176- precisions_chol [k ] = linalg .solve_triangular (cov_chol ,
177- np .eye (n_features ),
178- lower = True ).T
179- return precisions_chol
165+ covariances [k ] = np .dot (resp [:, k ] * diff .T , diff ) / nk [k ]
166+ covariances [k ].flat [::n_features + 1 ] += reg_covar
167+ return covariances
180168
181169
182- def _estimate_gaussian_precisions_cholesky_tied (resp , X , nk , means , reg_covar ):
183- """Estimate the tied precision matrix.
170+ def _estimate_gaussian_covariances_tied (resp , X , nk , means , reg_covar ):
171+ """Estimate the tied covariance matrix.
184172
185173 Parameters
186174 ----------
@@ -196,26 +184,20 @@ def _estimate_gaussian_precisions_cholesky_tied(resp, X, nk, means, reg_covar):
196184
197185 Returns
198186 -------
199- precisions_chol : array, shape (n_features, n_features)
200- The cholesky decomposition of the precision matrix .
187+ covariance : array, shape (n_features, n_features)
188+ The tied covariance matrix of the components .
201189 """
202- n_samples , n_features = X .shape
190+ n_samples , _ = X .shape
203191 avg_X2 = np .dot (X .T , X )
204192 avg_means2 = np .dot (nk * means .T , means )
205- covariances = avg_X2 - avg_means2
206- covariances /= n_samples
207- covariances .flat [::len (covariances ) + 1 ] += reg_covar
208- try :
209- cov_chol = linalg .cholesky (covariances , lower = True )
210- except linalg .LinAlgError :
211- raise ValueError (ESTIMATE_PRECISION_ERROR_MESSAGE )
212- precisions_chol = linalg .solve_triangular (cov_chol , np .eye (n_features ),
213- lower = True ).T
214- return precisions_chol
193+ covariance = avg_X2 - avg_means2
194+ covariance /= n_samples
195+ covariance .flat [::len (covariance ) + 1 ] += reg_covar
196+ return covariance
215197
216198
217- def _estimate_gaussian_precisions_cholesky_diag (resp , X , nk , means , reg_covar ):
218- """Estimate the diagonal precision matrices .
199+ def _estimate_gaussian_covariances_diag (resp , X , nk , means , reg_covar ):
200+ """Estimate the diagonal covariance vectors .
219201
220202 Parameters
221203 ----------
@@ -231,21 +213,17 @@ def _estimate_gaussian_precisions_cholesky_diag(resp, X, nk, means, reg_covar):
231213
232214 Returns
233215 -------
234- precisions_chol : array, shape (n_components, n_features)
235- The cholesky decomposition of the precision matrix .
216+ covariances : array, shape (n_components, n_features)
217+ The covariance vector of the current components .
236218 """
237219 avg_X2 = np .dot (resp .T , X * X ) / nk [:, np .newaxis ]
238220 avg_means2 = means ** 2
239221 avg_X_means = means * np .dot (resp .T , X ) / nk [:, np .newaxis ]
240- covariances = avg_X2 - 2 * avg_X_means + avg_means2 + reg_covar
241- if np .any (np .less_equal (covariances , 0.0 )):
242- raise ValueError (ESTIMATE_PRECISION_ERROR_MESSAGE )
243- return 1. / np .sqrt (covariances )
222+ return avg_X2 - 2 * avg_X_means + avg_means2 + reg_covar
244223
245224
246- def _estimate_gaussian_precisions_cholesky_spherical (resp , X , nk , means ,
247- reg_covar ):
248- """Estimate the spherical precision matrices.
225+ def _estimate_gaussian_covariances_spherical (resp , X , nk , means , reg_covar ):
226+ """Estimate the spherical variance values.
249227
250228 Parameters
251229 ----------
@@ -261,16 +239,11 @@ def _estimate_gaussian_precisions_cholesky_spherical(resp, X, nk, means,
261239
262240 Returns
263241 -------
264- precisions_chol : array, shape (n_components,)
265- The cholesky decomposition of the precision matrix .
242+ variances : array, shape (n_components,)
243+ The variance values of each components .
266244 """
267- avg_X2 = np .dot (resp .T , X * X ) / nk [:, np .newaxis ]
268- avg_means2 = means ** 2
269- avg_X_means = means * np .dot (resp .T , X ) / nk [:, np .newaxis ]
270- covariances = (avg_X2 - 2 * avg_X_means + avg_means2 + reg_covar ).mean (1 )
271- if np .any (np .less_equal (covariances , 0.0 )):
272- raise ValueError (ESTIMATE_PRECISION_ERROR_MESSAGE )
273- return 1. / np .sqrt (covariances )
245+ return _estimate_gaussian_covariances_diag (resp , X , nk ,
246+ means , reg_covar ).mean (1 )
274247
275248
276249def _estimate_gaussian_parameters (X , resp , reg_covar , covariance_type ):
@@ -292,29 +265,77 @@ def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type):
292265
293266 Returns
294267 -------
295- nk : array, shape (n_components,)
268+ nk : array-like , shape (n_components,)
296269 The numbers of data samples in the current components.
297270
298- means : array, shape (n_components, n_features)
271+ means : array-like , shape (n_components, n_features)
299272 The centers of the current components.
300273
301- precisions_cholesky : array
302- The cholesky decomposition of sample precisions of the current
303- components. The shape depends of the covariance_type.
274+ covariances : array-like
275+ The covariance matrix of the current components.
276+ The shape depends of the covariance_type.
304277 """
305278 nk = resp .sum (axis = 0 ) + 10 * np .finfo (resp .dtype ).eps
306279 means = np .dot (resp .T , X ) / nk [:, np .newaxis ]
307- precs_chol = {"full" : _estimate_gaussian_precisions_cholesky_full ,
308- "tied" : _estimate_gaussian_precisions_cholesky_tied ,
309- "diag" : _estimate_gaussian_precisions_cholesky_diag ,
310- "spherical" : _estimate_gaussian_precisions_cholesky_spherical
311- }[covariance_type ](resp , X , nk , means , reg_covar )
312- return nk , means , precs_chol
280+ covariances = {"full" : _estimate_gaussian_covariances_full ,
281+ "tied" : _estimate_gaussian_covariances_tied ,
282+ "diag" : _estimate_gaussian_covariances_diag ,
283+ "spherical" : _estimate_gaussian_covariances_spherical
284+ }[covariance_type ](resp , X , nk , means , reg_covar )
285+ return nk , means , covariances
286+
287+
288+ def _compute_precision_cholesky (covariances , covariance_type ):
289+ """Compute the Cholesky decomposition of the precisions.
290+
291+ Parameters
292+ ----------
293+ covariances : array-like
294+ The covariance matrix of the current components.
295+ The shape depends of the covariance_type.
296+
297+ covariance_type : {'full', 'tied', 'diag', 'spherical'}
298+ The type of precision matrices.
299+
300+ Returns
301+ -------
302+ precisions_cholesky : array-like
303+ The cholesky decomposition of sample precisions of the current
304+ components. The shape depends of the covariance_type.
305+ """
306+ estimate_precision_error_message = (
307+ "The algorithm has diverged because of too few samples per "
308+ "components. Try to decrease the number of components, "
309+ "or increase reg_covar." )
310+
311+ if covariance_type in 'full' :
312+ n_components , n_features , _ = covariances .shape
313+ precisions_chol = np .empty ((n_components , n_features , n_features ))
314+ for k , covariance in enumerate (covariances ):
315+ try :
316+ cov_chol = linalg .cholesky (covariance , lower = True )
317+ except linalg .LinAlgError :
318+ raise ValueError (estimate_precision_error_message )
319+ precisions_chol [k ] = linalg .solve_triangular (cov_chol ,
320+ np .eye (n_features ),
321+ lower = True ).T
322+ elif covariance_type is 'tied' :
323+ _ , n_features = covariances .shape
324+ try :
325+ cov_chol = linalg .cholesky (covariances , lower = True )
326+ except linalg .LinAlgError :
327+ raise ValueError (estimate_precision_error_message )
328+ precisions_chol = linalg .solve_triangular (cov_chol , np .eye (n_features ),
329+ lower = True ).T
330+ else :
331+ if np .any (np .less_equal (covariances , 0.0 )):
332+ raise ValueError (estimate_precision_error_message )
333+ precisions_chol = 1. / np .sqrt (covariances )
334+ return precisions_chol
313335
314336
315337###############################################################################
316338# Gaussian mixture probability estimators
317-
318339def _estimate_log_gaussian_prob_full (X , means , precisions_chol ):
319340 """Estimate the log Gaussian probability for 'full' precision.
320341
@@ -497,21 +518,21 @@ class GaussianMixture(BaseMixture):
497518
498519 Attributes
499520 ----------
500- weights_ : array, shape (n_components,)
521+ weights_ : array-like , shape (n_components,)
501522 The weights of each mixture components.
502523
503- means_ : array, shape (n_components, n_features)
524+ means_ : array-like , shape (n_components, n_features)
504525 The mean of each mixture component.
505526
506- covariances_ : array
527+ covariances_ : array-like
507528 The covariance of each mixture component.
508529 The shape depends on `covariance_type`::
509530 (n_components,) if 'spherical',
510531 (n_features, n_features) if 'tied',
511532 (n_components, n_features) if 'diag',
512533 (n_components, n_features, n_features) if 'full'
513534
514- precisions_ : array
535+ precisions_ : array-like
515536 The precision matrices for each component in the mixture. A precision
516537 matrix is the inverse of a covariance matrix. A covariance matrix is
517538 symmetric positive definite so the mixture of Gaussian can be
@@ -524,7 +545,7 @@ class GaussianMixture(BaseMixture):
524545 (n_components, n_features) if 'diag',
525546 (n_components, n_features, n_features) if 'full'
526547
527- precisions_cholesky_ : array
548+ precisions_cholesky_ : array-like
528549 The cholesky decomposition of the precision matrices of each mixture
529550 component. A precision matrix is the inverse of a covariance matrix.
530551 A covariance matrix is symmetric positive definite so the mixture of
@@ -594,7 +615,7 @@ def _initialize(self, X, resp):
594615 """
595616 n_samples , _ = X .shape
596617
597- weights , means , precisions_cholesky = _estimate_gaussian_parameters (
618+ weights , means , covariances = _estimate_gaussian_parameters (
598619 X , resp , self .reg_covar , self .covariance_type )
599620 weights /= n_samples
600621
@@ -603,7 +624,9 @@ def _initialize(self, X, resp):
603624 self .means_ = means if self .means_init is None else self .means_init
604625
605626 if self .precisions_init is None :
606- self .precisions_cholesky_ = precisions_cholesky
627+ self .covariances_ = covariances
628+ self .precisions_cholesky_ = _compute_precision_cholesky (
629+ covariances , self .covariance_type )
607630 elif self .covariance_type is 'full' :
608631 self .precisions_cholesky_ = np .array (
609632 [linalg .cholesky (prec_init , lower = True )
@@ -619,10 +642,13 @@ def _e_step(self, X):
619642 return np .mean (log_prob_norm ), np .exp (log_resp )
620643
621644 def _m_step (self , X , resp ):
622- self .weights_ , self .means_ , self .precisions_cholesky_ = (
645+ n_samples , _ = X .shape
646+ self .weights_ , self .means_ , self .covariances_ = (
623647 _estimate_gaussian_parameters (X , resp , self .reg_covar ,
624648 self .covariance_type ))
625- self .weights_ /= X .shape [0 ]
649+ self .weights_ /= n_samples
650+ self .precisions_cholesky_ = _compute_precision_cholesky (
651+ self .covariances_ , self .covariance_type )
626652
627653 def _estimate_log_prob (self , X ):
628654 return {"full" : _estimate_log_gaussian_prob_full ,
@@ -649,22 +675,14 @@ def _set_parameters(self, params):
649675
650676 if self .covariance_type is 'full' :
651677 self .precisions_ = np .empty (self .precisions_cholesky_ .shape )
652- self .covariances_ = np .empty (self .precisions_cholesky_ .shape )
653678 for k , prec_chol in enumerate (self .precisions_cholesky_ ):
654679 self .precisions_ [k ] = np .dot (prec_chol , prec_chol .T )
655- cov_chol = linalg .solve_triangular (prec_chol ,
656- np .eye (n_features ))
657- self .covariances_ [k ] = np .dot (cov_chol .T , cov_chol )
658680
659681 elif self .covariance_type is 'tied' :
660682 self .precisions_ = np .dot (self .precisions_cholesky_ ,
661683 self .precisions_cholesky_ .T )
662- cov_chol = linalg .solve_triangular (self .precisions_cholesky_ ,
663- np .eye (n_features ))
664- self .covariances_ = np .dot (cov_chol .T , cov_chol )
665684 else :
666685 self .precisions_ = self .precisions_cholesky_ ** 2
667- self .covariances_ = 1. / self .precisions_
668686
669687 def _n_parameters (self ):
670688 """Return the number of free parameters in the model."""
0 commit comments