Skip to content

Commit 9c30c92

Browse files
ergamueller
authored andcommitted
FIX: sklearn.ensemble.forest: Refactor to remove references to parallelism in predict() functions.
1 parent 704ee89 commit 9c30c92

File tree

1 file changed

+30
-50
lines changed

1 file changed

+30
-50
lines changed

sklearn/ensemble/forest.py

Lines changed: 30 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -112,48 +112,6 @@ def _parallel_build_trees(n_trees, forest, X, y, sample_weight,
112112
return trees
113113

114114

115-
def _parallel_predict_proba(trees, X, n_classes, n_outputs):
116-
"""Private function used to compute a batch of predictions within a job."""
117-
n_samples = X.shape[0]
118-
119-
if n_outputs == 1:
120-
proba = np.zeros((n_samples, n_classes))
121-
122-
for tree in trees:
123-
proba_tree = tree.predict_proba(X)
124-
125-
if n_classes == tree.n_classes_:
126-
proba += proba_tree
127-
128-
else:
129-
for j, c in enumerate(tree.classes_):
130-
proba[:, c] += proba_tree[:, j]
131-
132-
else:
133-
proba = []
134-
135-
for k in xrange(n_outputs):
136-
proba.append(np.zeros((n_samples, n_classes[k])))
137-
138-
for tree in trees:
139-
proba_tree = tree.predict_proba(X)
140-
141-
for k in xrange(n_outputs):
142-
if n_classes[k] == tree.n_classes_[k]:
143-
proba[k] += proba_tree[k]
144-
145-
else:
146-
for j, c in enumerate(tree.classes_[k]):
147-
proba[k][:, c] += proba_tree[k][:, j]
148-
149-
return proba
150-
151-
152-
def _parallel_predict_regression(trees, X):
153-
"""Private function used to compute a batch of predictions within a job."""
154-
return sum(tree.predict(X) for tree in trees)
155-
156-
157115
def _partition_trees(forest):
158116
"""Private function used to partition trees between jobs."""
159117
# Compute the number of jobs
@@ -566,17 +524,40 @@ def predict_proba(self, X):
566524
if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
567525
X = array2d(X, dtype=DTYPE)
568526

569-
# Running with n_jobs > 1 is slower
570-
proba = _parallel_predict_proba(
571-
self.estimators_,
572-
X,
573-
self.n_classes_,
574-
self.n_outputs_)
527+
n_samples = X.shape[0]
575528

576529
if self.n_outputs_ == 1:
530+
proba = np.zeros((n_samples, self.n_classes_))
531+
532+
for tree in self.estimators_:
533+
proba_tree = tree.predict_proba(X)
534+
535+
if self.n_classes_ == tree.n_classes_:
536+
proba += proba_tree
537+
538+
else:
539+
for j, c in enumerate(tree.classes_):
540+
proba[:, c] += proba_tree[:, j]
541+
577542
proba /= self.n_estimators
578543

579544
else:
545+
proba = []
546+
547+
for k in xrange(self.n_outputs_):
548+
proba.append(np.zeros((n_samples, self.n_classes_[k])))
549+
550+
for tree in self.estimators_:
551+
proba_tree = tree.predict_proba(X)
552+
553+
for k in xrange(self.n_outputs_):
554+
if self.n_classes_[k] == tree.n_classes_[k]:
555+
proba[k] += proba_tree[k]
556+
557+
else:
558+
for j, c in enumerate(tree.classes_[k]):
559+
proba[k][:, c] += proba_tree[k][:, j]
560+
580561
for k in xrange(self.n_outputs_):
581562
proba[k] /= self.n_estimators
582563

@@ -662,9 +643,8 @@ def predict(self, X):
662643
if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
663644
X = array2d(X, dtype=DTYPE)
664645

665-
y_hat = _parallel_predict_regression(self.estimators_, X)
646+
y_hat = sum(tree.predict(X) for tree in self.estimators_)
666647
y_hat /= self.n_estimators
667-
668648
return y_hat
669649

670650

0 commit comments

Comments
 (0)