@@ -257,6 +257,84 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
257
257
}
258
258
259
259
260
+ /* *
261
+ * Chooses the initial centers in a way inspired by Gonzales (by Pierre-Emmanuel Viel):
262
+ * select the first point of the list as a candidate, then parse the points list. If another
263
+ * point is further than current candidate from the other centers, test if it is a good center
264
+ * of a local aggregation. If it is, replace current candidate by this point. And so on...
265
+ *
266
+ * Used with KMeansIndex that computes centers coordinates by averaging positions of clusters points,
267
+ * this doesn't make a real difference with previous methods. But used with HierarchicalClusteringIndex
268
+ * class that pick centers among existing points instead of computing the barycenters, there is a real
269
+ * improvement.
270
+ *
271
+ * Params:
272
+ * k = number of centers
273
+ * vecs = the dataset of points
274
+ * indices = indices in the dataset
275
+ * Returns:
276
+ */
277
+ void GroupWiseCenterChooser (int k, int * dsindices, int indices_length, int * centers, int & centers_length)
278
+ {
279
+ const float kSpeedUpFactor = 1 .3f ;
280
+
281
+ int n = indices_length;
282
+
283
+ DistanceType* closestDistSq = new DistanceType[n];
284
+
285
+ // Choose one random center and set the closestDistSq values
286
+ int index = rand_int (n);
287
+ assert (index >=0 && index < n);
288
+ centers[0 ] = dsindices[index ];
289
+
290
+ for (int i = 0 ; i < n; i++) {
291
+ closestDistSq[i] = distance (dataset[dsindices[i]], dataset[dsindices[index ]], dataset.cols );
292
+ }
293
+
294
+
295
+ // Choose each center
296
+ int centerCount;
297
+ for (centerCount = 1 ; centerCount < k; centerCount++) {
298
+
299
+ // Repeat several trials
300
+ double bestNewPot = -1 ;
301
+ int bestNewIndex = 0 ;
302
+ DistanceType furthest = 0 ;
303
+ for (index = 0 ; index < n; index ++) {
304
+
305
+ // We will test only the potential of the points further than current candidate
306
+ if ( closestDistSq[index ] > kSpeedUpFactor * (float )furthest ) {
307
+
308
+ // Compute the new potential
309
+ double newPot = 0 ;
310
+ for (int i = 0 ; i < n; i++) {
311
+ newPot += std::min ( distance (dataset[dsindices[i]], dataset[dsindices[index ]], dataset.cols )
312
+ , closestDistSq[i] );
313
+ }
314
+
315
+ // Store the best result
316
+ if ((bestNewPot < 0 )||(newPot <= bestNewPot)) {
317
+ bestNewPot = newPot;
318
+ bestNewIndex = index ;
319
+ furthest = closestDistSq[index ];
320
+ }
321
+ }
322
+ }
323
+
324
+ // Add the appropriate center
325
+ centers[centerCount] = dsindices[bestNewIndex];
326
+ for (int i = 0 ; i < n; i++) {
327
+ closestDistSq[i] = std::min ( distance (dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols )
328
+ , closestDistSq[i] );
329
+ }
330
+ }
331
+
332
+ centers_length = centerCount;
333
+
334
+ delete[] closestDistSq;
335
+ }
336
+
337
+
260
338
public:
261
339
262
340
@@ -290,6 +368,9 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
290
368
else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
291
369
chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp;
292
370
}
371
+ else if (centers_init_==FLANN_CENTERS_GROUPWISE) {
372
+ chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser;
373
+ }
293
374
else {
294
375
throw FLANNException (" Unknown algorithm for choosing initial centers." );
295
376
}
0 commit comments