Skip to content

Commit eeb6dcc

Browse files
amuellerGaelVaroquaux
authored andcommitted
ENH use random states everywhere, never call np.random.
1 parent 6c42e9d commit eeb6dcc

File tree

22 files changed

+100
-95
lines changed

22 files changed

+100
-95
lines changed

sklearn/cluster/tests/test_hierarchical.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ def test_structured_ward_tree():
1717
"""
1818
Check that we obtain the correct solution for structured ward tree.
1919
"""
20-
np.random.seed(0)
20+
rnd = np.random.RandomState(0)
2121
mask = np.ones([10, 10], dtype=np.bool)
22-
X = np.random.randn(50, 100)
22+
X = rnd.randn(50, 100)
2323
connectivity = grid_to_graph(*mask.shape)
2424
children, n_components, n_leaves = ward_tree(X.T, connectivity)
2525
n_nodes = 2 * X.shape[1] - 1
@@ -30,8 +30,8 @@ def test_unstructured_ward_tree():
3030
"""
3131
Check that we obtain the correct solution for unstructured ward tree.
3232
"""
33-
np.random.seed(0)
34-
X = np.random.randn(50, 100)
33+
rnd = np.random.RandomState(0)
34+
X = rnd.randn(50, 100)
3535
children, n_nodes, n_leaves = ward_tree(X.T)
3636
n_nodes = 2 * X.shape[1] - 1
3737
assert_true(len(children) + n_leaves == n_nodes)
@@ -41,9 +41,9 @@ def test_height_ward_tree():
4141
"""
4242
Check that the height of ward tree is sorted.
4343
"""
44-
np.random.seed(0)
44+
rnd = np.random.RandomState(0)
4545
mask = np.ones([10, 10], dtype=np.bool)
46-
X = np.random.randn(50, 100)
46+
X = rnd.randn(50, 100)
4747
connectivity = grid_to_graph(*mask.shape)
4848
children, n_nodes, n_leaves = ward_tree(X.T, connectivity)
4949
n_nodes = 2 * X.shape[1] - 1
@@ -54,9 +54,9 @@ def test_ward_clustering():
5454
"""
5555
Check that we obtain the correct number of clusters with Ward clustering.
5656
"""
57-
np.random.seed(0)
57+
rnd = np.random.RandomState(0)
5858
mask = np.ones([10, 10], dtype=np.bool)
59-
X = np.random.randn(100, 50)
59+
X = rnd.randn(100, 50)
6060
connectivity = grid_to_graph(*mask.shape)
6161
clustering = Ward(n_clusters=10, connectivity=connectivity)
6262
clustering.fit(X)
@@ -67,9 +67,9 @@ def test_ward_agglomeration():
6767
"""
6868
Check that we obtain the correct solution in a simplistic case
6969
"""
70-
np.random.seed(0)
70+
rnd = np.random.RandomState(0)
7171
mask = np.ones([10, 10], dtype=np.bool)
72-
X = np.random.randn(50, 100)
72+
X = rnd.randn(50, 100)
7373
connectivity = grid_to_graph(*mask.shape)
7474
ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
7575
ward.fit(X)
@@ -98,10 +98,11 @@ def test_scikit_vs_scipy():
9898
"""
9999
from scipy.sparse import lil_matrix
100100
n, p, k = 10, 5, 3
101+
rnd = np.random.RandomState(0)
101102

102103
connectivity = lil_matrix(np.ones((n, n)))
103104
for i in range(5):
104-
X = .1 * np.random.normal(size=(n, p))
105+
X = .1 * rnd.normal(size=(n, p))
105106
X -= 4 * np.arange(n)[:, np.newaxis]
106107
X -= X.mean(axis=1)[:, np.newaxis]
107108

sklearn/cluster/tests/test_k_means.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ def test_square_norms():
4343

4444

4545
def test_kmeans_dtype():
46-
X = np.random.normal(size=(40, 2))
46+
rnd = np.random.RandomState(0)
47+
X = rnd.normal(size=(40, 2))
4748
X = (X * 10).astype(np.uint8)
4849
km = KMeans(n_init=1).fit(X)
4950
with warnings.catch_warnings(record=True) as w:

sklearn/covariance/tests/test_robust_covariance.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ def test_outlier_detection():
7272
"""
7373
7474
"""
75-
np.random.RandomState(0)
76-
X = np.random.randn(100, 10)
75+
rnd = np.random.RandomState(0)
76+
X = rnd.randn(100, 10)
7777
clf = EllipticEnvelope(contamination=0.1)
7878
clf.fit(X)
7979
y_pred = clf.predict(X)

sklearn/decomposition/tests/test_fastica.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def test_fastica(add_noise=False):
5151
""" Test the FastICA algorithm on very simple data.
5252
"""
5353
# scipy.stats uses the global RNG:
54-
np.random.seed(0)
54+
rng = np.random.RandomState(0)
5555
n_samples = 1000
5656
# Generate two sources:
5757
s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
@@ -67,7 +67,7 @@ def test_fastica(add_noise=False):
6767
m = np.dot(mixing, s)
6868

6969
if add_noise:
70-
m += 0.1 * np.random.randn(2, 1000)
70+
m += 0.1 * rng.randn(2, 1000)
7171

7272
center_and_norm(m)
7373

sklearn/ensemble/tests/test_forest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,15 @@
3030
# also load the iris dataset
3131
# and randomly permute it
3232
iris = datasets.load_iris()
33-
np.random.seed([1])
34-
perm = np.random.permutation(iris.target.size)
33+
rng = np.random.RandomState(0)
34+
perm = rng.permutation(iris.target.size)
3535
iris.data = iris.data[perm]
3636
iris.target = iris.target[perm]
3737

3838
# also load the boston dataset
3939
# and randomly permute it
4040
boston = datasets.load_boston()
41-
perm = np.random.permutation(boston.target.size)
41+
perm = rng.permutation(boston.target.size)
4242
boston.data = boston.data[perm]
4343
boston.target = boston.target[perm]
4444

sklearn/ensemble/tests/test_gradient_boosting.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,18 @@
2020
T = [[-1, -1], [2, 2], [3, 2]]
2121
true_result = [-1, 1, 1]
2222

23+
rng = np.random.RandomState(0)
2324
# also load the boston dataset
2425
# and randomly permute it
2526
boston = datasets.load_boston()
26-
perm = np.random.permutation(boston.target.size)
27+
perm = rng.permutation(boston.target.size)
2728
boston.data = boston.data[perm]
2829
boston.target = boston.target[perm]
2930

3031
# also load the iris dataset
3132
# and randomly permute it
3233
iris = datasets.load_iris()
33-
perm = np.random.permutation(iris.target.size)
34+
perm = rng.permutation(iris.target.size)
3435
iris.data = iris.data[perm]
3536
iris.target = iris.target[perm]
3637

@@ -248,7 +249,7 @@ def test_check_inputs_predict():
248249
assert_raises(ValueError, clf.predict, x)
249250

250251
clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
251-
clf.fit(X, np.random.rand(len(X)))
252+
clf.fit(X, rng.rand(len(X)))
252253

253254
x = np.array([1.0, 2.0])[:, np.newaxis]
254255
assert_raises(ValueError, clf.predict, x)
@@ -312,6 +313,6 @@ def test_degenerate_targets():
312313

313314
clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
314315
clf.fit(X, np.ones(len(X)))
315-
clf.predict(np.random.rand(2))
316+
clf.predict(rng.rand(2))
316317
assert_array_equal(np.ones((1,), dtype=np.float64),
317-
clf.predict(np.random.rand(2)))
318+
clf.predict(rng.rand(2)))

sklearn/feature_selection/tests/test_feature_select.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919

2020
def test_f_oneway_vs_scipy_stats():
2121
"""Test that our f_oneway gives the same result as scipy.stats"""
22-
X1 = np.random.randn(10, 3)
23-
X2 = 1 + np.random.randn(10, 3)
22+
rng = np.random.RandomState(0)
23+
X1 = rng.randn(10, 3)
24+
X2 = 1 + rng.randn(10, 3)
2425
f, pv = stats.f_oneway(X1, X2)
2526
f2, pv2 = f_oneway(X1, X2)
2627
assert_true(np.allclose(f, f2))
@@ -67,8 +68,8 @@ def test_f_regression_input_dtype():
6768
Test whether f_regression returns the same value
6869
for any numeric data_type
6970
"""
70-
71-
X = np.random.rand(10, 20)
71+
rng = np.random.RandomState(0)
72+
X = rng.rand(10, 20)
7273
y = np.arange(10).astype(np.int)
7374

7475
F1, pv1 = f_regression(X, y)

sklearn/hmm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def __init__(self, n_components=1, startprob=None, transmat=None,
135135
self._algorithm = algorithm
136136
else:
137137
self._algorithm = "viterbi"
138-
self.random_state = random_state
138+
self.random_state = check_random_state(random_state)
139139

140140
def eval(self, obs):
141141
"""Compute the log probability under the model and compute posteriors
@@ -900,8 +900,8 @@ def _init(self, obs, params='ste'):
900900
super(MultinomialHMM, self)._init(obs, params=params)
901901

902902
if 'e' in params:
903-
emissionprob = normalize(np.random.rand(self.n_components,
904-
self.n_symbols), 1)
903+
emissionprob = normalize(self.random_state.rand(self.n_components,
904+
self.n_symbols), 1)
905905
self.emissionprob_ = emissionprob
906906

907907
def _initialize_sufficient_statistics(self):

sklearn/linear_model/tests/test_logistic.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ def test_predict_iris():
7777

7878
def test_inconsistent_input():
7979
"""Test that an exception is raised on inconsistent input"""
80-
X_ = np.random.random((5, 10))
80+
rng = np.random.RandomState(0)
81+
X_ = rng.random_sample((5, 10))
8182
y_ = np.ones(X_.shape[0])
8283

8384
clf = logistic.LogisticRegression()
@@ -87,9 +88,8 @@ def test_inconsistent_input():
8788
assert_raises(ValueError, clf.fit, X, y_wrong)
8889

8990
# Wrong dimensions for test data
90-
assert_raises(ValueError,
91-
clf.fit(X_, y_).predict,
92-
np.random.random((3, 12)))
91+
assert_raises(ValueError, clf.fit(X_, y_).predict,
92+
rng.random_sample((3, 12)))
9393

9494

9595
@raises(ValueError)

sklearn/linear_model/tests/test_ridge.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717

1818
from sklearn.cross_validation import KFold
1919

20+
rng = np.random.RandomState(0)
2021
diabetes = datasets.load_diabetes()
21-
2222
X_diabetes, y_diabetes = diabetes.data, diabetes.target
2323
ind = np.arange(X_diabetes.shape[0])
24-
np.random.shuffle(ind)
24+
rng.shuffle(ind)
2525
ind = ind[:200]
2626
X_diabetes, y_diabetes = X_diabetes[ind], y_diabetes[ind]
2727

@@ -30,8 +30,6 @@
3030
X_iris = sp.csr_matrix(iris.data)
3131
y_iris = iris.target
3232

33-
np.random.seed(0)
34-
3533
DENSE_FILTER = lambda X: X
3634
SPARSE_FILTER = lambda X: sp.csr_matrix(X)
3735

@@ -46,8 +44,8 @@ def test_ridge():
4644

4745
# With more samples than features
4846
n_samples, n_features = 6, 5
49-
y = np.random.randn(n_samples)
50-
X = np.random.randn(n_samples, n_features)
47+
y = rng.randn(n_samples)
48+
X = rng.randn(n_samples, n_features)
5149

5250
ridge = Ridge(alpha=alpha)
5351
ridge.fit(X, y)
@@ -59,8 +57,8 @@ def test_ridge():
5957

6058
# With more features than samples
6159
n_samples, n_features = 5, 10
62-
y = np.random.randn(n_samples)
63-
X = np.random.randn(n_samples, n_features)
60+
y = rng.randn(n_samples)
61+
X = rng.randn(n_samples, n_features)
6462
ridge = Ridge(alpha=alpha)
6563
ridge.fit(X, y)
6664
assert_greater(ridge.score(X, y), .9)
@@ -73,8 +71,8 @@ def test_ridge_shapes():
7371
"""Test shape of coef_ and intercept_
7472
"""
7573
n_samples, n_features = 5, 10
76-
X = np.random.randn(n_samples, n_features)
77-
y = np.random.randn(n_samples)
74+
X = rng.randn(n_samples, n_features)
75+
y = rng.randn(n_samples)
7876
Y1 = y[:, np.newaxis]
7977
Y = np.c_[y, 1 + y]
8078

@@ -97,8 +95,8 @@ def test_ridge_intercept():
9795
"""Test intercept with multiple targets GH issue #708
9896
"""
9997
n_samples, n_features = 5, 10
100-
X = np.random.randn(n_samples, n_features)
101-
y = np.random.randn(n_samples)
98+
X = rng.randn(n_samples, n_features)
99+
y = rng.randn(n_samples)
102100
Y = np.c_[y, 1. + y]
103101

104102
ridge = Ridge()
@@ -140,9 +138,8 @@ def test_ridge_vs_lstsq():
140138

141139
# we need more samples than features
142140
n_samples, n_features = 5, 4
143-
np.random.seed(0)
144-
y = np.random.randn(n_samples)
145-
X = np.random.randn(n_samples, n_features)
141+
y = rng.randn(n_samples)
142+
X = rng.randn(n_samples, n_features)
146143

147144
ridge = Ridge(alpha=0., fit_intercept=False)
148145
ols = LinearRegression(fit_intercept=False)

0 commit comments

Comments
 (0)