Skip to content

Commit c71084e

Browse files
committed
Cuda Cosine Working
1 parent 66efaaa commit c71084e

File tree

5 files changed

+20
-190
lines changed

5 files changed

+20
-190
lines changed

GPU/ann.cu

Lines changed: 0 additions & 169 deletions
This file was deleted.

GPU/nn.cu

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
#include <iostream>
33
#include <memory>
44
#include <string>
5+
#include <ctime>
56
#include <vector>
67
#include <cmath>
78
#include <cuda_runtime.h>
89

9-
__global__ void findNearestNeighborCosine(float *points, float *queries, float *max_cosine, int *max_index, int n, int num_queries, int dimensions, float target_similarity) {
10+
__global__ void findNearestNeighborCosine(float *points, float *queries, float *max_cosine, int n, int num_queries, int dimensions) {
1011
extern __shared__ char shared[];
1112
float *s_cosine = (float*)shared;
1213
int *s_index = (int*)(shared + blockDim.x * sizeof(float));
@@ -33,8 +34,6 @@ __global__ void findNearestNeighborCosine(float *points, float *queries, float *
3334

3435
s_cosine[threadIdx.x] = cosine_similarity;
3536
s_index[threadIdx.x] = tid;
36-
if(cosine_similarity > target_similarity)
37-
max_index[qid] = tid;
3837
__syncthreads();
3938
}
4039
}
@@ -55,11 +54,13 @@ std::vector<std::vector<float>> read_matrix(FILE* fin, int row, int col) {
5554

5655
int main(int argc, char* argv[]) {
5756
FILE* fin = fopen(argv[1], "r");
58-
FILE* fout = fopen(argv[2], "w");
5957

6058
int n = 0, d = 0, m = 0;
61-
float target_similarity = 0;
62-
fscanf(fin, "%d%d%d%f", &d, &n, &m, &target_similarity);
59+
fscanf(fin, "%d%d%d", &d, &n, &m);
60+
61+
double total_cosine_GPU_time = 0.0;
62+
63+
clock_t start_time, end_time;
6364

6465
std::vector<std::vector<float>> base = read_matrix(fin, n, d);
6566
std::vector<std::vector<float>> query = read_matrix(fin, m, d);
@@ -73,13 +74,12 @@ int main(int argc, char* argv[]) {
7374

7475

7576
float* d_base, * d_query, *d_max_cosine;
76-
int *d_max_index;
7777

78-
78+
start_time = clock();
79+
7980
cudaMalloc(&d_base, n * d * sizeof(float));
8081
cudaMalloc(&d_query, m * d * sizeof(float));
8182
cudaMalloc(&d_max_cosine, m * sizeof(float));
82-
cudaMalloc(&d_max_index, m * sizeof(int));
8383

8484

8585
float *max_cosine_host = new float[m];
@@ -98,23 +98,20 @@ int main(int argc, char* argv[]) {
9898

9999

100100
int sharedMemSize = threadsPerBlock.x * (sizeof(float) + sizeof(int));
101-
findNearestNeighborCosine<<<blocksPerGrid, threadsPerBlock, sharedMemSize>>>(d_base, d_query, d_max_cosine, d_max_index, n, m, d, target_similarity);
101+
findNearestNeighborCosine<<<blocksPerGrid, threadsPerBlock, sharedMemSize>>>(d_base, d_query, d_max_cosine, n, m, d);
102102

103103

104-
int *max_index_host = new int[m];
105104
cudaMemcpy(max_cosine_host, d_max_cosine, m * sizeof(float), cudaMemcpyDeviceToHost);
106-
cudaMemcpy(max_index_host, d_max_index, m * sizeof(int), cudaMemcpyDeviceToHost);
107-
108-
109-
for (int i = 0; i < m; ++i) {
110-
fprintf(fout, "%d\n", max_index_host[i]);
111-
}
112-
113105

114106
cudaFree(d_base);
115107
cudaFree(d_query);
116108
cudaFree(d_max_cosine);
117-
cudaFree(d_max_index);
109+
110+
end_time = clock(); // Record the ending time
111+
112+
total_cosine_GPU_time = static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC;
113+
114+
std::cout << "Total cosine similarity with GPU: " << total_cosine_GPU_time << " seconds." << std::endl;
118115

119116
return 0;
120117
}

Project Document.docx

-215 Bytes
Binary file not shown.

compile.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
g++ -c -fopenmp main.cpp -o main.o
22
g++ -c -fopenmp hnsw_implementation/hnsw.cpp -o hnsw.o
33
g++ -fopenmp main.o hnsw.o -o my_program
4-
nvcc GPU/ann.cu -o nn -O3 -arch=sm_60
4+
nvcc GPU/nn.cu -o nn -O3 -arch=sm_60

run.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
./my_program $1 $2 $3
1+
./my_program $1 $2 $3
2+
cd GPU
3+
./nn ../$1

0 commit comments

Comments
 (0)