Skip to content

Commit 14dd3cb

Browse files
qinhanmin2014jnothman
authored andcommitted
FIX Support float min_samples and min_cluster_size in OPTICS (scikit-learn#14496)
1 parent 74ae6a0 commit 14dd3cb

File tree

3 files changed

+20
-13
lines changed

3 files changed

+20
-13
lines changed

doc/whats_new/v0.21.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ Changelog
3333
threaded when `n_jobs > 1` or `n_jobs = -1`.
3434
:pr:`12955` by :user:`Prabakaran Kumaresshan <nixphix>`.
3535

36+
- |Fix| Fixed a bug in :class:`cluster.OPTICS` where users were unable to pass
37+
float `min_samples` and `min_cluster_size`. :pr:`14496` by
38+
:user:`Fabian Klopfer <someusername1>`
39+
and :user:`Hanmin Qin <qinhanmin2014>`.
40+
3641
:mod:`sklearn.compose`
3742
......................
3843

sklearn/cluster/optics_.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class OPTICS(BaseEstimator, ClusterMixin):
4444
4545
Parameters
4646
----------
47-
min_samples : int > 1 or float between 0 and 1 (default=None)
47+
min_samples : int > 1 or float between 0 and 1 (default=5)
4848
The number of samples in a neighborhood for a point to be considered as
4949
a core point. Also, up and down steep regions can't have more then
5050
``min_samples`` consecutive non-steep points. Expressed as an absolute
@@ -341,7 +341,7 @@ def compute_optics_graph(X, min_samples, max_eps, metric, p, metric_params,
341341
A feature array, or array of distances between samples if
342342
metric='precomputed'
343343
344-
min_samples : int (default=5)
344+
min_samples : int > 1 or float between 0 and 1
345345
The number of samples in a neighborhood for a point to be considered
346346
as a core point. Expressed as an absolute number or a fraction of the
347347
number of samples (rounded to be at least 2).
@@ -437,7 +437,7 @@ def compute_optics_graph(X, min_samples, max_eps, metric, p, metric_params,
437437
n_samples = X.shape[0]
438438
_validate_size(min_samples, n_samples, 'min_samples')
439439
if min_samples <= 1:
440-
min_samples = max(2, min_samples * n_samples)
440+
min_samples = max(2, int(min_samples * n_samples))
441441

442442
# Start all points as 'unprocessed' ##
443443
reachability_ = np.empty(n_samples)
@@ -582,7 +582,7 @@ def cluster_optics_xi(reachability, predecessor, ordering, min_samples,
582582
ordering : array, shape (n_samples,)
583583
OPTICS ordered point indices (`ordering_`)
584584
585-
min_samples : int > 1 or float between 0 and 1 (default=None)
585+
min_samples : int > 1 or float between 0 and 1
586586
The same as the min_samples given to OPTICS. Up and down steep regions
587587
can't have more then ``min_samples`` consecutive non-steep points.
588588
Expressed as an absolute number or a fraction of the number of samples
@@ -619,12 +619,12 @@ def cluster_optics_xi(reachability, predecessor, ordering, min_samples,
619619
n_samples = len(reachability)
620620
_validate_size(min_samples, n_samples, 'min_samples')
621621
if min_samples <= 1:
622-
min_samples = max(2, min_samples * n_samples)
622+
min_samples = max(2, int(min_samples * n_samples))
623623
if min_cluster_size is None:
624624
min_cluster_size = min_samples
625625
_validate_size(min_cluster_size, n_samples, 'min_cluster_size')
626626
if min_cluster_size <= 1:
627-
min_cluster_size = max(2, min_cluster_size * n_samples)
627+
min_cluster_size = max(2, int(min_cluster_size * n_samples))
628628

629629
clusters = _xi_cluster(reachability[ordering], predecessor[ordering],
630630
ordering, xi,
@@ -753,16 +753,12 @@ def _xi_cluster(reachability_plot, predecessor_plot, ordering, xi, min_samples,
753753
reachability plot is defined by the ratio from one point to its
754754
successor being at most 1-xi.
755755
756-
min_samples : int > 1 or float between 0 and 1 (default=None)
756+
min_samples : int > 1
757757
The same as the min_samples given to OPTICS. Up and down steep regions
758758
can't have more then ``min_samples`` consecutive non-steep points.
759-
Expressed as an absolute number or a fraction of the number of samples
760-
(rounded to be at least 2).
761759
762-
min_cluster_size : int > 1 or float between 0 and 1
763-
Minimum number of samples in an OPTICS cluster, expressed as an
764-
absolute number or a fraction of the number of samples (rounded
765-
to be at least 2).
760+
min_cluster_size : int > 1
761+
Minimum number of samples in an OPTICS cluster.
766762
767763
predecessor_correction : bool
768764
Correct clusters based on the calculated predecessors.

sklearn/cluster/tests/test_optics.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ def test_extract_xi():
101101
xi=0.4).fit(X)
102102
assert_array_equal(clust.labels_, expected_labels)
103103

104+
# check float min_samples and min_cluster_size
105+
clust = OPTICS(min_samples=0.1, min_cluster_size=0.08,
106+
max_eps=20, cluster_method='xi',
107+
xi=0.4).fit(X)
108+
assert_array_equal(clust.labels_, expected_labels)
109+
104110
X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]] * 2), C6))
105111
expected_labels = np.r_[[1] * 5, [3] * 5, [2] * 5, [0] * 5, [2] * 5,
106112
-1, -1, [4] * 5]

0 commit comments

Comments
 (0)