1212import warnings
1313import numpy as np
1414from scipy import linalg
15+ from time import time
1516
1617from ..base import BaseEstimator
1718from ..utils import check_random_state , check_array
@@ -156,6 +157,11 @@ class GMM(BaseEstimator):
156157 process. Can contain any combination of 'w' for weights,
157158 'm' for means, and 'c' for covars. Defaults to 'wmc'.
158159
160+ verbose : int, default: 0
161+ Enable verbose output. If 1 then it always prints the current
162+ initialization and iteration step. If greater than 1 then
163+ it prints additionally the change and time needed for each step.
164+
159165 Attributes
160166 ----------
161167 weights_ : array, shape (`n_components`,)
@@ -203,7 +209,7 @@ class GMM(BaseEstimator):
203209 >>> g.fit(obs) # doctest: +NORMALIZE_WHITESPACE
204210 GMM(covariance_type='diag', init_params='wmc', min_covar=0.001,
205211 n_components=2, n_init=1, n_iter=100, params='wmc',
206- random_state=None, thresh=None, tol=0.001)
212+ random_state=None, thresh=None, tol=0.001, verbose=0 )
207213 >>> np.round(g.weights_, 2)
208214 array([ 0.75, 0.25])
209215 >>> np.round(g.means_, 2)
@@ -221,15 +227,16 @@ class GMM(BaseEstimator):
221227 >>> g.fit(20 * [[0]] + 20 * [[10]]) # doctest: +NORMALIZE_WHITESPACE
222228 GMM(covariance_type='diag', init_params='wmc', min_covar=0.001,
223229 n_components=2, n_init=1, n_iter=100, params='wmc',
224- random_state=None, thresh=None, tol=0.001)
230+ random_state=None, thresh=None, tol=0.001, verbose=0 )
225231 >>> np.round(g.weights_, 2)
226232 array([ 0.5, 0.5])
227233
228234 """
229235
230236 def __init__ (self , n_components = 1 , covariance_type = 'diag' ,
231237 random_state = None , thresh = None , tol = 1e-3 , min_covar = 1e-3 ,
232- n_iter = 100 , n_init = 1 , params = 'wmc' , init_params = 'wmc' ):
238+ n_iter = 100 , n_init = 1 , params = 'wmc' , init_params = 'wmc' ,
239+ verbose = 0 ):
233240 if thresh is not None :
234241 warnings .warn ("'thresh' has been replaced by 'tol' in 0.16 "
235242 " and will be removed in 0.18." ,
@@ -244,6 +251,7 @@ def __init__(self, n_components=1, covariance_type='diag',
244251 self .n_init = n_init
245252 self .params = params
246253 self .init_params = init_params
254+ self .verbose = verbose
247255
248256 if covariance_type not in ['spherical' , 'tied' , 'diag' , 'full' ]:
249257 raise ValueError ('Invalid value for covariance_type: %s' %
@@ -458,15 +466,26 @@ def _fit(self, X, y=None, do_prediction=False):
458466
459467 max_log_prob = - np .infty
460468
461- for _ in range (self .n_init ):
469+ if self .verbose > 0 :
470+ print ('Expectation-maximization algorithm started.' )
471+
472+ for init in range (self .n_init ):
473+ if self .verbose > 0 :
474+ print ('Initialization ' + str (init + 1 ))
475+ start_init_time = time ()
476+
462477 if 'm' in self .init_params or not hasattr (self , 'means_' ):
463478 self .means_ = cluster .KMeans (
464479 n_clusters = self .n_components ,
465480 random_state = self .random_state ).fit (X ).cluster_centers_
481+ if self .verbose > 1 :
482+ print ('\t Means have been initialized.' )
466483
467484 if 'w' in self .init_params or not hasattr (self , 'weights_' ):
468485 self .weights_ = np .tile (1.0 / self .n_components ,
469486 self .n_components )
487+ if self .verbose > 1 :
488+ print ('\t Weights have been initialized.' )
470489
471490 if 'c' in self .init_params or not hasattr (self , 'covars_' ):
472491 cv = np .cov (X .T ) + self .min_covar * np .eye (X .shape [1 ])
@@ -475,6 +494,8 @@ def _fit(self, X, y=None, do_prediction=False):
475494 self .covars_ = \
476495 distribute_covar_matrix_to_match_covariance_type (
477496 cv , self .covariance_type , self .n_components )
497+ if self .verbose > 1 :
498+ print ('\t Covariance matrices have been initialized.' )
478499
479500 # EM algorithms
480501 current_log_likelihood = None
@@ -486,23 +507,33 @@ def _fit(self, X, y=None, do_prediction=False):
486507 else self .thresh / float (X .shape [0 ]))
487508
488509 for i in range (self .n_iter ):
510+ if self .verbose > 0 :
511+ print ('\t EM iteration ' + str (i + 1 ))
512+ start_iter_time = time ()
489513 prev_log_likelihood = current_log_likelihood
490514 # Expectation step
491515 log_likelihoods , responsibilities = self .score_samples (X )
492516 current_log_likelihood = log_likelihoods .mean ()
493517
494518 # Check for convergence.
495- # (should compare to self.tol when dreprecated 'thresh' is
519+ # (should compare to self.tol when deprecated 'thresh' is
496520 # removed in v0.18)
497521 if prev_log_likelihood is not None :
498522 change = abs (current_log_likelihood - prev_log_likelihood )
523+ if self .verbose > 1 :
524+ print ('\t \t Change: ' + str (change ))
499525 if change < tol :
500526 self .converged_ = True
527+ if self .verbose > 0 :
528+ print ('\t \t EM algorithm converged.' )
501529 break
502530
503531 # Maximization step
504532 self ._do_mstep (X , responsibilities , self .params ,
505533 self .min_covar )
534+ if self .verbose > 1 :
535+ print ('\t \t EM iteration ' + str (i + 1 )+ ' took {0:.5f}s' .format (
536+ time ()- start_iter_time ))
506537
507538 # if the results are better, keep it
508539 if self .n_iter :
@@ -511,6 +542,13 @@ def _fit(self, X, y=None, do_prediction=False):
511542 best_params = {'weights' : self .weights_ ,
512543 'means' : self .means_ ,
513544 'covars' : self .covars_ }
545+ if self .verbose > 1 :
546+ print ('\t Better parameters were found.' )
547+
548+ if self .verbose > 1 :
549+ print ('\t Initialization ' + str (init + 1 )+ ' took {0:.5f}s' .format (
550+ time ()- start_init_time ))
551+
514552 # check the existence of an init param that was not subject to
515553 # likelihood computation issue.
516554 if np .isneginf (max_log_prob ) and self .n_iter :
@@ -661,7 +699,8 @@ def _log_multivariate_normal_density_full(X, means, covars, min_covar=1.e-7):
661699 cv_chol = linalg .cholesky (cv + min_covar * np .eye (n_dim ),
662700 lower = True )
663701 except linalg .LinAlgError :
664- raise ValueError ("'covars' must be symmetric, positive-definite" )
702+ raise ValueError ("'covars' must be symmetric, "
703+ "positive-definite" )
665704
666705 cv_log_det = 2 * np .sum (np .log (np .diagonal (cv_chol )))
667706 cv_sol = linalg .solve_triangular (cv_chol , (X - mu ).T , lower = True ).T
0 commit comments