1+ #include " hnsw.h"
2+
3+ #include < algorithm>
4+ #include < iostream>
5+ #include < queue>
6+ #include < random>
7+ #include < set>
8+ #include < unordered_set>
9+ #include < vector>
10+ using namespace std ;
11+
12+ vector<int > HNSWGraph::searchLayer (Item& q, int ep, int ef, int lc) {
13+ set<pair<double , int >> candidates;
14+ set<pair<double , int >> nearestNeighbors;
15+ unordered_set<int > isVisited;
16+
17+ double td = q.dist (items[ep]);
18+ candidates.insert (make_pair (td, ep));
19+ nearestNeighbors.insert (make_pair (td, ep));
20+ isVisited.insert (ep);
21+ while (!candidates.empty ()) {
22+ auto ci = candidates.begin (); candidates.erase (candidates.begin ());
23+ int nid = ci->second ;
24+ auto fi = nearestNeighbors.end (); fi--;
25+ if (ci->first > fi->first ) break ;
26+ for (int ed: layerEdgeLists[lc][nid]) {
27+ if (isVisited.find (ed) != isVisited.end ()) continue ;
28+ fi = nearestNeighbors.end (); fi--;
29+ isVisited.insert (ed);
30+ td = q.dist (items[ed]);
31+ if ((td < fi->first ) || nearestNeighbors.size () < ef) {
32+ candidates.insert (make_pair (td, ed));
33+ nearestNeighbors.insert (make_pair (td, ed));
34+ if (nearestNeighbors.size () > ef) nearestNeighbors.erase (fi);
35+ }
36+ }
37+ }
38+ vector<int > results;
39+ for (auto &p: nearestNeighbors) results.push_back (p.second );
40+ return results;
41+ }
42+
43+ vector<int > HNSWGraph::KNNSearch (Item& q, int K) {
44+ int maxLyer = layerEdgeLists.size () - 1 ;
45+ int ep = enterNode;
46+ for (int l = maxLyer; l >= 1 ; l--) ep = searchLayer (q, ep, 1 , l)[0 ];
47+ return searchLayer (q, ep, K, 0 );
48+ }
49+
50+ void HNSWGraph::addEdge (int st, int ed, int lc) {
51+ if (st == ed) return ;
52+ layerEdgeLists[lc][st].push_back (ed);
53+ layerEdgeLists[lc][ed].push_back (st);
54+ }
55+
56+ void HNSWGraph::Insert (Item& q) {
57+ int nid = items.size ();
58+ itemNum++; items.push_back (q);
59+ // sample layer
60+ int maxLyer = layerEdgeLists.size () - 1 ;
61+ int l = 0 ;
62+ uniform_real_distribution<double > distribution (0.0 ,1.0 );
63+ while (l < ml && (1.0 / ml <= distribution (generator))) {
64+ l++;
65+ if (layerEdgeLists.size () <= l) layerEdgeLists.push_back (unordered_map<int , vector<int >>());
66+ }
67+ if (nid == 0 ) {
68+ enterNode = nid;
69+ return ;
70+ }
71+ // search up layer entrance
72+ int ep = enterNode;
73+ for (int i = maxLyer; i > l; i--) ep = searchLayer (q, ep, 1 , i)[0 ];
74+ for (int i = min (l, maxLyer); i >= 0 ; i--) {
75+ int MM = l == 0 ? MMax0 : MMax;
76+ vector<int > neighbors = searchLayer (q, ep, efConstruction, i);
77+ vector<int > selectedNeighbors = vector<int >(neighbors.begin (), neighbors.begin ()+min (int (neighbors.size ()), M));
78+ for (int n: selectedNeighbors) addEdge (n, nid, i);
79+ for (int n: selectedNeighbors) {
80+ if (layerEdgeLists[i][n].size () > MM) {
81+ vector<pair<double , int >> distPairs;
82+ for (int nn: layerEdgeLists[i][n]) distPairs.emplace_back (items[n].dist (items[nn]), nn);
83+ sort (distPairs.begin (), distPairs.end ());
84+ layerEdgeLists[i][n].clear ();
85+ for (int d = 0 ; d < min (int (distPairs.size ()), MM); d++) layerEdgeLists[i][n].push_back (distPairs[d].second );
86+ }
87+ }
88+ ep = selectedNeighbors[0 ];
89+ }
90+ if (l == layerEdgeLists.size () - 1 ) enterNode = nid;
91+ }
0 commit comments