@@ -65,12 +65,12 @@ class calls the ``fit`` method of each sub-estimator on random samples
6565
6666
6767def _parallel_build_trees (n_trees , forest , X , y , sample_weight ,
68- sample_mask , X_argsorted , seed , verbose ):
68+ sample_mask , X_argsorted , seeds , verbose ):
6969 """Private function used to build a batch of trees within a job."""
70- random_state = check_random_state (seed )
7170 trees = []
7271
7372 for i in range (n_trees ):
73+ random_state = check_random_state (seeds [i ])
7474 if verbose > 1 :
7575 print ("building tree %d of %d" % (i + 1 , n_trees ))
7676 seed = random_state .randint (MAX_INT )
@@ -356,6 +356,9 @@ def fit(self, X, y, sample_weight=None):
356356 # Assign chunk of trees to jobs
357357 n_jobs , n_trees , _ = _partition_trees (self )
358358
359+ # Precalculate the random states
360+ seeds = [random_state .randint (MAX_INT , size = n_trees [i ]) for i in xrange (len (n_trees ))]
361+
359362 # Parallel loop
360363 all_trees = Parallel (n_jobs = n_jobs , verbose = self .verbose )(
361364 delayed (_parallel_build_trees )(
@@ -366,7 +369,7 @@ def fit(self, X, y, sample_weight=None):
366369 sample_weight ,
367370 sample_mask ,
368371 X_argsorted ,
369- random_state . randint ( MAX_INT ) ,
372+ seeds [ i ] ,
370373 verbose = self .verbose )
371374 for i in range (n_jobs ))
372375
@@ -563,32 +566,17 @@ def predict_proba(self, X):
563566 if getattr (X , "dtype" , None ) != DTYPE or X .ndim != 2 :
564567 X = array2d (X , dtype = DTYPE )
565568
566- # Assign chunk of trees to jobs
567- n_jobs , n_trees , starts = _partition_trees (self )
568-
569- # Parallel loop
570- all_proba = Parallel (n_jobs = n_jobs , verbose = self .verbose )(
571- delayed (_parallel_predict_proba )(
572- self .estimators_ [starts [i ]:starts [i + 1 ]],
569+ # Running with n_jobs > 1 is slower
570+ proba = _parallel_predict_proba (
571+ self .estimators_ ,
573572 X ,
574573 self .n_classes_ ,
575574 self .n_outputs_ )
576- for i in range (n_jobs ))
577-
578- # Reduce
579- proba = all_proba [0 ]
580575
581576 if self .n_outputs_ == 1 :
582- for j in xrange (1 , len (all_proba )):
583- proba += all_proba [j ]
584-
585577 proba /= self .n_estimators
586578
587579 else :
588- for j in xrange (1 , len (all_proba )):
589- for k in xrange (self .n_outputs_ ):
590- proba [k ] += all_proba [j ][k ]
591-
592580 for k in xrange (self .n_outputs_ ):
593581 proba [k ] /= self .n_estimators
594582
@@ -674,17 +662,8 @@ def predict(self, X):
674662 if getattr (X , "dtype" , None ) != DTYPE or X .ndim != 2 :
675663 X = array2d (X , dtype = DTYPE )
676664
677- # Assign chunk of trees to jobs
678- n_jobs , n_trees , starts = _partition_trees (self )
679-
680- # Parallel loop
681- all_y_hat = Parallel (n_jobs = n_jobs , verbose = self .verbose )(
682- delayed (_parallel_predict_regression )(
683- self .estimators_ [starts [i ]:starts [i + 1 ]], X )
684- for i in range (n_jobs ))
685-
686- # Reduce
687- y_hat = sum (all_y_hat ) / self .n_estimators
665+ y_hat = _parallel_predict_regression (self .estimators_ , X )
666+ y_hat /= self .n_estimators
688667
689668 return y_hat
690669
0 commit comments