Skip to content

Commit c0d1d88

Browse files
committed
FIX remove dtype from covertype, add fetch_covtype to init, add missing docstrings.
1 parent 1c60618 commit c0d1d88

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

sklearn/datasets/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
'fetch_olivetti_faces',
6060
'fetch_species_distributions',
6161
'fetch_california_housing',
62+
'fetch_covtype',
6263
'get_data_home',
6364
'load_20newsgroups',
6465
'load_boston',

sklearn/datasets/covtype.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,19 @@ def fetch_covtype(data_home=None, download_if_missing=True,
4141
Specify another download and cache folder for the datasets. By default
4242
all scikit learn data is stored in '~/scikit_learn_data' subfolders.
4343
44-
download_if_missing: boolean, optional
44+
download_if_missing : boolean, default=True
4545
If False, raise a IOError if the data is not locally available
4646
instead of trying to download the data from the source site.
4747
48-
dtype: dtype specifier, optional
49-
dtype of feature array.
48+
random_state : int, RandomState instance or None, optional (default=None)
49+
Random state for shuffling the dataset.
50+
If int, random_state is the seed used by the random number generator;
51+
If RandomState instance, random_state is the random number generator;
52+
If None, the random number generator is the RandomState instance used
53+
by `np.random`.
54+
55+
shuffle : bool, default=False
56+
Whether to shuffle dataset.
5057
"""
5158

5259
data_home = get_data_home(data_home=data_home)
@@ -59,7 +66,7 @@ def fetch_covtype(data_home=None, download_if_missing=True,
5966
_mkdirp(covtype_dir)
6067
logger.warn("Downloading %s" % URL)
6168
f = BytesIO(urlopen(URL).read())
62-
Xy = np.genfromtxt(GzipFile(fileobj=f), delimiter=',', dtype=dtype)
69+
Xy = np.genfromtxt(GzipFile(fileobj=f), delimiter=',')
6370

6471
X = Xy[:, :-1]
6572
y = Xy[:, -1].astype(np.int32)
@@ -68,7 +75,7 @@ def fetch_covtype(data_home=None, download_if_missing=True,
6875
joblib.dump(y, targets_path, compress=9)
6976

7077
try:
71-
X, y
78+
X, y
7279
except NameError:
7380
X = joblib.load(samples_path)
7481
y = joblib.load(targets_path)

0 commit comments

Comments
 (0)