Skip to content

Commit 8748802

Browse files
authored
Merge pull request #3 from CSCI-739/initial-draft
Initial draft - 2
2 parents bb36184 + fe2cff6 commit 8748802

File tree

10 files changed

+222
-20
lines changed

10 files changed

+222
-20
lines changed

CPU/vector_initialize.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#ifndef ITEMS_H
2+
#define ITEMS_H
3+
4+
#include <vector>
5+
#include <cmath>
6+
#include <omp.h>
7+
8+
struct Item {
9+
Item(std::vector<double> _values) : values(_values) {}
10+
std::vector<double> values;
11+
12+
double cosine_similarity(Item& other) {
13+
double dot_product = 0.0;
14+
double magnitude_this = 0.0;
15+
double magnitude_other = 0.0;
16+
17+
for (size_t i = 0; i < values.size(); ++i) {
18+
dot_product += values[i] * other.values[i];
19+
magnitude_this += values[i] * values[i];
20+
magnitude_other += other.values[i] * other.values[i];
21+
}
22+
23+
magnitude_this = sqrt(magnitude_this);
24+
magnitude_other = sqrt(magnitude_other);
25+
26+
if (magnitude_this == 0 || magnitude_other == 0) {
27+
return 0.0;
28+
}
29+
30+
return dot_product / (magnitude_this * magnitude_other);
31+
}
32+
33+
double dist(Item& other) {
34+
double result = 0.0;
35+
for (size_t i = 0; i < values.size(); i++) {
36+
result += (values[i] - other.values[i]) * (values[i] - other.values[i]);
37+
}
38+
return result;
39+
}
40+
41+
void normalize() {
42+
double sum = 0.0;
43+
for (double val : values) {
44+
sum += val * val;
45+
}
46+
47+
double magnitude = std::sqrt(sum);
48+
if (magnitude > 0.0) {
49+
for (double& val : values) {
50+
val /= magnitude;
51+
}
52+
}
53+
}
54+
55+
double cosine_similarity_with_normalisation(Item& other) {
56+
double dot_product = 0.0;
57+
// #pragma omp parallel for reduction(+:dot_product)
58+
for (size_t i = 0; i < values.size(); ++i) {
59+
dot_product += values[i] * other.values[i];
60+
}
61+
62+
return dot_product;
63+
}
64+
};
65+
66+
#endif

GPU/ann.cu

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#include <cstring>
2+
#include <iostream>
3+
#include <memory>
4+
#include <string>
5+
#include <vector>
6+
#include <cmath>
7+
#include <cuda_runtime.h>
8+
9+
__global__ void findNearestNeighborCosine(float *points, float *queries, float *max_cosine, int *max_index, int n, int num_queries, int dimensions, float target_similarity) {
10+
extern __shared__ char shared[];
11+
float *s_cosine = (float*)shared;
12+
int *s_index = (int*)(shared + blockDim.x * sizeof(float));
13+
14+
int tid = threadIdx.x + blockIdx.x * blockDim.x;
15+
int qid = blockIdx.y;
16+
17+
if (tid < n && qid < num_queries) {
18+
float dot_product = 0, query_magnitude = 0, point_magnitude = 0;
19+
for (int d = 0; d < dimensions; ++d) {
20+
int idx = tid * dimensions + d;
21+
int q_idx = qid * dimensions + d;
22+
dot_product += queries[q_idx] * points[idx];
23+
query_magnitude += queries[q_idx] * queries[q_idx];
24+
point_magnitude += points[idx] * points[idx];
25+
}
26+
query_magnitude = sqrt(query_magnitude);
27+
point_magnitude = sqrt(point_magnitude);
28+
29+
float cosine_similarity = 0;
30+
if (query_magnitude > 0 && point_magnitude > 0) {
31+
cosine_similarity = dot_product / (query_magnitude * point_magnitude);
32+
}
33+
34+
s_cosine[threadIdx.x] = cosine_similarity;
35+
s_index[threadIdx.x] = tid;
36+
if(cosine_similarity > target_similarity)
37+
max_index[qid] = tid;
38+
__syncthreads();
39+
}
40+
}
41+
42+
43+
std::vector<std::vector<float>> read_matrix(FILE* fin, int row, int col) {
44+
std::vector<std::vector<float>> ret;
45+
for (int i = 0; i < row; ++i) {
46+
std::vector<float> curr;
47+
float tmp = 0;
48+
for (int j = 0; j < col; ++j) {
49+
fscanf(fin, "%f", &tmp);
50+
curr.push_back(tmp);
51+
}
52+
ret.push_back(curr);
53+
}
54+
return ret;
55+
}
56+
57+
int main(int argc, char* argv[]) {
58+
FILE* fin = fopen(argv[1], "r");
59+
FILE* fout = fopen(argv[2], "w");
60+
61+
int n = 0, d = 0, m = 0;
62+
float target_similarity = 0.9;
63+
fscanf(fin, "%d%d%d", &d, &n, &m);
64+
65+
std::vector<std::vector<float>> base = read_matrix(fin, n, d);
66+
std::vector<std::vector<float>> query = read_matrix(fin, m, d);
67+
68+
float* flat_base = new float[n * d];
69+
float* flat_query = new float[m * d];
70+
for (int i = 0; i < n; ++i)
71+
memcpy(flat_base + i * d, base[i].data(), d * sizeof(float));
72+
for (int i = 0; i < m; ++i)
73+
memcpy(flat_query + i * d, query[i].data(), d * sizeof(float));
74+
75+
float* d_base, * d_query, *d_max_cosine;
76+
int *d_max_index;
77+
78+
cudaMalloc(&d_base, n * d * sizeof(float));
79+
cudaMalloc(&d_query, m * d * sizeof(float));
80+
cudaMalloc(&d_max_cosine, m * sizeof(float));
81+
cudaMalloc(&d_max_index, m * sizeof(int));
82+
83+
float *max_cosine_host = new float[m];
84+
for (int i = 0; i < m; i++) {
85+
max_cosine_host[i] = -1.0f;
86+
}
87+
88+
cudaMemcpy(d_base, flat_base, n * d * sizeof(float), cudaMemcpyHostToDevice);
89+
cudaMemcpy(d_query, flat_query, m * d * sizeof(float), cudaMemcpyHostToDevice);
90+
cudaMemcpy(d_max_cosine, max_cosine_host, m * sizeof(float), cudaMemcpyHostToDevice);
91+
92+
dim3 threadsPerBlock(256);
93+
dim3 blocksPerGrid((n + threadsPerBlock.x - 1) / threadsPerBlock.x, m);
94+
95+
int sharedMemSize = threadsPerBlock.x * (sizeof(float) + sizeof(int));
96+
findNearestNeighborCosine<<<blocksPerGrid, threadsPerBlock, sharedMemSize>>>(d_base, d_query, d_max_cosine, d_max_index, n, m, d, target_similarity);
97+
98+
int *max_index_host = new int[m];
99+
cudaMemcpy(max_cosine_host, d_max_cosine, m * sizeof(float), cudaMemcpyDeviceToHost);
100+
cudaMemcpy(max_index_host, d_max_index, m * sizeof(int), cudaMemcpyDeviceToHost);
101+
102+
for (int i = 0; i < m; ++i) {
103+
fprintf(fout, "%d\n", max_index_host[i]);
104+
}
105+
106+
cudaFree(d_base);
107+
cudaFree(d_query);
108+
cudaFree(d_max_cosine);
109+
cudaFree(d_max_index);
110+
111+
return 0;
112+
}

compile.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
g++ -c main.cpp -o main.o
2-
g++ -c hnsw.cpp -o hnsw.o
1+
g++ -c -fopenmp main.cpp -o main.o
2+
g++ -c -fopenmp hnsw_implementation/hnsw.cpp -o hnsw.o
33
g++ main.o hnsw.o -o my_program

hnsw_implementation/hnsw.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <set>
88
#include <unordered_set>
99
#include <vector>
10+
#include <omp.h>
1011
using namespace std;
1112

1213
vector<int> HNSWGraph::searchLayer(Item& q, int ep, int ef, int lc) {

hnsw_implementation/hnsw.h

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,11 @@
55
#include <vector>
66
#include <unordered_map>
77
#include <iostream>
8+
#include <omp.h>
9+
10+
#include "../CPU/vector_initialize.h"
811
using namespace std;
912

10-
struct Item {
11-
Item(vector<double> _values):values(_values) {}
12-
vector<double> values;
13-
double dist(Item& other) {
14-
double result = 0.0;
15-
for (int i = 0; i < values.size(); i++){
16-
result += (values[i] - other.values[i]) * (values[i] - other.values[i]);
17-
}
18-
return result;
19-
}
20-
};
2113

2214
struct HNSWGraph {
2315
HNSWGraph(int _M, int _MMax, int _MMax0, int _efConstruction, int _ml):M(_M),MMax(_MMax),MMax0(_MMax0),efConstruction(_efConstruction),ml(_ml){

main.cpp

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
#include "hnsw.h"
2-
1+
#include "hnsw_implementation/hnsw.h"
32
#include <algorithm>
43
#include <ctime>
54
#include <iostream>
@@ -9,6 +8,7 @@
98
#include <memory>
109
#include <string>
1110
#include <sstream>
11+
#include <omp.h>
1212
using namespace std;
1313

1414
void readInputFromFile(const string& filename, int& D, int& N, int& M, vector<Item>& base, vector<Item>& queries) {
@@ -85,7 +85,9 @@ int main(int argc, char* argv[]) {
8585
myHNSWGraph.Insert(base[i]);
8686
}
8787

88-
double total_brute_force_time = 0.0;
88+
double total_euclidean_time = 0.0;
89+
double total_cosine_time = 0.0;
90+
double total_cosine_normalised_time = 0.0;
8991
double total_hnsw_time = 0.0;
9092

9193
int numHits = 0;
@@ -105,9 +107,18 @@ int main(int argc, char* argv[]) {
105107
distPairs.emplace_back(query.dist(base[j]), j);
106108
}
107109
sort(distPairs.begin(), distPairs.end());
108-
total_brute_force_time += double(clock() - begin_time) / CLOCKS_PER_SEC;
110+
total_euclidean_time += double(clock() - begin_time) / CLOCKS_PER_SEC;
109111

110112
begin_time = clock();
113+
114+
for (int j = 0; j < N; ++j) {
115+
if (j == i) continue;
116+
double cos_sim = query.cosine_similarity(base[j]);
117+
}
118+
total_cosine_time += double(clock() - begin_time) / CLOCKS_PER_SEC;
119+
120+
begin_time = clock();
121+
111122
vector<int> knns = myHNSWGraph.KNNSearch(query, K);
112123
for (size_t idx = 0; idx < knns.size(); ++idx) {
113124
outfile << knns[idx];
@@ -120,8 +131,27 @@ int main(int argc, char* argv[]) {
120131

121132
if (knns[0] == distPairs[0].second) numHits++;
122133
}
134+
for (Item& item : base) {
135+
item.normalize();
136+
}
137+
138+
for (Item& item : queries) {
139+
item.normalize();
140+
}
141+
for (int i = 0; i < M; ++i) {
142+
Item query = queries[i];
143+
clock_t begin_time = clock();
144+
for (int j = 0; j < N; ++j) {
145+
if (j == i) continue;
146+
double cos_sim_normalized = query.cosine_similarity_with_normalisation(base[j]);
147+
}
148+
total_cosine_normalised_time += double(clock() - begin_time) / CLOCKS_PER_SEC;
149+
}
123150
outfile.close();
124-
cout << numHits << " " << total_brute_force_time / M << " " << total_hnsw_time / M << endl;
151+
cout << "Total euclidean time: " << total_euclidean_time << endl;
152+
cout << "Total HNSW time: " << total_hnsw_time << endl;
153+
cout << "Total cosine similarity time: " << total_cosine_time << endl;
154+
cout << "Total cosine similarity with normalization time: " << total_cosine_normalised_time << endl;
125155

126156
return 0;
127157
}

main1.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#include "hnsw.h"
2-
1+
#include "hnsw_implementation/hnsw.h"
2+
#include "./CPU/vector_initialize.h"
33
#include <algorithm>
44
#include <ctime>
55
#include <iostream>

sample.py

Whitespace-only changes.

sample_inputs/input_generator.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ int main(){
7171
dump_func(1,2,10,5,0.9);
7272
dump_func(2,4,50,10,0.9);
7373
dump_func(3,4,1000,100,0.9);
74+
dump_func(4,5,10000,100,0.9);
7475
return 0;
7576
}
7677

0 commit comments

Comments
 (0)