Merge pull request #3 from CSCI-739/initial-draft

adish29 · web-flow · commit 874880240bb2 · 2023-11-28T15:08:15.000-05:00
Initial draft - 2
diff --git a/CPU/vector_initialize.h b/CPU/vector_initialize.h
@@ -0,0 +1,66 @@
+#ifndef ITEMS_H
+#define ITEMS_H
+
+#include <vector>
+#include <cmath>
+#include <omp.h>
+
+struct Item {
+    Item(std::vector<double> _values) : values(_values) {}
+    std::vector<double> values;
+
+    double cosine_similarity(Item& other) {
+        double dot_product = 0.0;
+        double magnitude_this = 0.0;
+        double magnitude_other = 0.0;
+
+        for (size_t i = 0; i < values.size(); ++i) {
+            dot_product += values[i] * other.values[i];
+            magnitude_this += values[i] * values[i];
+            magnitude_other += other.values[i] * other.values[i];
+        }
+
+        magnitude_this = sqrt(magnitude_this);
+        magnitude_other = sqrt(magnitude_other);
+
+        if (magnitude_this == 0 || magnitude_other == 0) {
+            return 0.0; 
+        }
+
+        return dot_product / (magnitude_this * magnitude_other);
+    }
+
+    double dist(Item& other) {
+        double result = 0.0;
+        for (size_t i = 0; i < values.size(); i++) {
+            result += (values[i] - other.values[i]) * (values[i] - other.values[i]);
+        }
+        return result;
+    }
+
+    void normalize() {
+        double sum = 0.0;
+        for (double val : values) {
+            sum += val * val;
+        }
+
+        double magnitude = std::sqrt(sum);
+        if (magnitude > 0.0) {
+            for (double& val : values) {
+                val /= magnitude;
+            }
+        }
+    }
+
+    double cosine_similarity_with_normalisation(Item& other) {
+        double dot_product = 0.0;
+        // #pragma omp parallel for reduction(+:dot_product)
+        for (size_t i = 0; i < values.size(); ++i) {
+            dot_product += values[i] * other.values[i];
+        }
+
+        return dot_product;
+    }
+};
+
+#endif 
diff --git a/GPU/ann.cu b/GPU/ann.cu
@@ -0,0 +1,112 @@
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+#include <cmath>
+#include <cuda_runtime.h>
+
+__global__ void findNearestNeighborCosine(float *points, float *queries, float *max_cosine, int *max_index, int n, int num_queries, int dimensions, float target_similarity) {
+    extern __shared__ char shared[];
+    float *s_cosine = (float*)shared;
+    int *s_index = (int*)(shared + blockDim.x * sizeof(float));
+
+    int tid = threadIdx.x + blockIdx.x * blockDim.x;
+    int qid = blockIdx.y;
+
+    if (tid < n && qid < num_queries) {
+        float dot_product = 0, query_magnitude = 0, point_magnitude = 0;
+        for (int d = 0; d < dimensions; ++d) {
+            int idx = tid * dimensions + d;
+            int q_idx = qid * dimensions + d;
+            dot_product += queries[q_idx] * points[idx];
+            query_magnitude += queries[q_idx] * queries[q_idx];
+            point_magnitude += points[idx] * points[idx];
+        }
+        query_magnitude = sqrt(query_magnitude);
+        point_magnitude = sqrt(point_magnitude);
+
+        float cosine_similarity = 0;
+        if (query_magnitude > 0 && point_magnitude > 0) {
+            cosine_similarity = dot_product / (query_magnitude * point_magnitude);
+        }
+
+        s_cosine[threadIdx.x] = cosine_similarity;
+        s_index[threadIdx.x] = tid;
+        if(cosine_similarity > target_similarity)
+            max_index[qid] = tid;
+        __syncthreads();
+    }
+}
+
+
+std::vector<std::vector<float>> read_matrix(FILE* fin, int row, int col) {
+    std::vector<std::vector<float>> ret;
+    for (int i = 0; i < row; ++i) {
+        std::vector<float> curr;
+        float tmp = 0;
+        for (int j = 0; j < col; ++j) {
+            fscanf(fin, "%f", &tmp);
+            curr.push_back(tmp);
+        }
+        ret.push_back(curr);
+    }
+    return ret;
+}
+
+int main(int argc, char* argv[]) {
+    FILE* fin = fopen(argv[1], "r");
+    FILE* fout = fopen(argv[2], "w");
+
+    int n = 0, d = 0, m = 0;
+    float target_similarity = 0.9;
+    fscanf(fin, "%d%d%d", &d, &n, &m);
+
+    std::vector<std::vector<float>> base = read_matrix(fin, n, d);
+    std::vector<std::vector<float>> query = read_matrix(fin, m, d);
+
+    float* flat_base = new float[n * d];
+    float* flat_query = new float[m * d];
+    for (int i = 0; i < n; ++i)
+        memcpy(flat_base + i * d, base[i].data(), d * sizeof(float));
+    for (int i = 0; i < m; ++i)
+        memcpy(flat_query + i * d, query[i].data(), d * sizeof(float));
+
+    float* d_base, * d_query, *d_max_cosine;
+    int *d_max_index;
+
+    cudaMalloc(&d_base, n * d * sizeof(float));
+    cudaMalloc(&d_query, m * d * sizeof(float));
+    cudaMalloc(&d_max_cosine, m * sizeof(float));
+    cudaMalloc(&d_max_index, m * sizeof(int));
+
+    float *max_cosine_host = new float[m];
+    for (int i = 0; i < m; i++) {
+        max_cosine_host[i] = -1.0f;
+    }
+
+    cudaMemcpy(d_base, flat_base, n * d * sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(d_query, flat_query, m * d * sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(d_max_cosine, max_cosine_host, m * sizeof(float), cudaMemcpyHostToDevice);
+
+    dim3 threadsPerBlock(256);
+    dim3 blocksPerGrid((n + threadsPerBlock.x - 1) / threadsPerBlock.x, m);
+
+    int sharedMemSize = threadsPerBlock.x * (sizeof(float) + sizeof(int));
+    findNearestNeighborCosine<<<blocksPerGrid, threadsPerBlock, sharedMemSize>>>(d_base, d_query, d_max_cosine, d_max_index, n, m, d, target_similarity);
+
+    int *max_index_host = new int[m];
+    cudaMemcpy(max_cosine_host, d_max_cosine, m * sizeof(float), cudaMemcpyDeviceToHost);
+    cudaMemcpy(max_index_host, d_max_index, m * sizeof(int), cudaMemcpyDeviceToHost);
+
+    for (int i = 0; i < m; ++i) {
+        fprintf(fout, "%d\n", max_index_host[i]);
+    }
+
+    cudaFree(d_base);
+    cudaFree(d_query);
+    cudaFree(d_max_cosine);
+    cudaFree(d_max_index);
+
+    return 0;
+}
diff --git a/compile.sh b/compile.sh
@@ -1,3 +1,3 @@
-g++ -c main.cpp -o main.o
-g++ -c hnsw.cpp -o hnsw.o
+g++ -c -fopenmp main.cpp -o main.o
+g++ -c -fopenmp hnsw_implementation/hnsw.cpp -o hnsw.o
 g++ main.o hnsw.o -o my_program
diff --git a/hnsw_implementation/hnsw.cpp b/hnsw_implementation/hnsw.cpp
@@ -7,6 +7,7 @@
 #include <set>
 #include <unordered_set>
 #include <vector>
+#include <omp.h>
 using namespace std;
 
 vector<int> HNSWGraph::searchLayer(Item& q, int ep, int ef, int lc) {
diff --git a/hnsw_implementation/hnsw.h b/hnsw_implementation/hnsw.h
@@ -5,19 +5,11 @@
 #include <vector>
 #include <unordered_map>
 #include <iostream>
+#include <omp.h>
+
+#include "../CPU/vector_initialize.h"
 using namespace std;
 
-struct Item {
-	Item(vector<double> _values):values(_values) {}
-	vector<double> values;
-	double dist(Item& other) {
-		double result = 0.0;
-		for (int i = 0; i < values.size(); i++){ 
-            result += (values[i] - other.values[i]) * (values[i] - other.values[i]);
-        }
-		return result;
-	}
-};
 
 struct HNSWGraph {
 	HNSWGraph(int _M, int _MMax, int _MMax0, int _efConstruction, int _ml):M(_M),MMax(_MMax),MMax0(_MMax0),efConstruction(_efConstruction),ml(_ml){
diff --git a/hnsw_implementation/hnsw_py.cpp b/hnsw_implementation/hnsw_py.cpp
diff --git a/main.cpp b/main.cpp
@@ -1,5 +1,4 @@
-#include "hnsw.h"
-
+#include "hnsw_implementation/hnsw.h"
 #include <algorithm>
 #include <ctime>
 #include <iostream>
@@ -9,6 +8,7 @@
 #include <memory>
 #include <string>
 #include <sstream>
+#include <omp.h>
 using namespace std;
 
 void readInputFromFile(const string& filename, int& D, int& N, int& M, vector<Item>& base, vector<Item>& queries) {
@@ -85,7 +85,9 @@ int main(int argc, char* argv[]) {
         myHNSWGraph.Insert(base[i]);
     }
 
-    double total_brute_force_time = 0.0;
+    double total_euclidean_time = 0.0;
+    double total_cosine_time = 0.0;
+    double total_cosine_normalised_time = 0.0;
     double total_hnsw_time = 0.0;
 
     int numHits = 0;
@@ -105,9 +107,18 @@ int main(int argc, char* argv[]) {
             distPairs.emplace_back(query.dist(base[j]), j);
         }
         sort(distPairs.begin(), distPairs.end());
-        total_brute_force_time += double(clock() - begin_time) / CLOCKS_PER_SEC;
+        total_euclidean_time += double(clock() - begin_time) / CLOCKS_PER_SEC;
 
         begin_time = clock();
+
+        for (int j = 0; j < N; ++j) {
+            if (j == i) continue;
+            double cos_sim = query.cosine_similarity(base[j]);
+        }
+        total_cosine_time += double(clock() - begin_time) / CLOCKS_PER_SEC;
+
+        begin_time = clock();
+
         vector<int> knns = myHNSWGraph.KNNSearch(query, K);
         for (size_t idx = 0; idx < knns.size(); ++idx) {
             outfile << knns[idx];
@@ -120,8 +131,27 @@ int main(int argc, char* argv[]) {
 
         if (knns[0] == distPairs[0].second) numHits++;
     }
+    for (Item& item : base) {
+        item.normalize();
+    }
+
+    for (Item& item : queries) {
+        item.normalize();
+    }
+    for (int i = 0; i < M; ++i) {
+        Item query = queries[i];
+        clock_t begin_time = clock();
+        for (int j = 0; j < N; ++j) {
+            if (j == i) continue;
+            double cos_sim_normalized = query.cosine_similarity_with_normalisation(base[j]);
+        }
+        total_cosine_normalised_time += double(clock() - begin_time) / CLOCKS_PER_SEC;
+    }
     outfile.close();
-    cout << numHits << " " << total_brute_force_time / M << " " << total_hnsw_time / M << endl;
+    cout << "Total euclidean time: " << total_euclidean_time << endl;
+    cout << "Total HNSW time: " << total_hnsw_time << endl;
+    cout << "Total cosine similarity time: " << total_cosine_time << endl;
+    cout << "Total cosine similarity with normalization time: " << total_cosine_normalised_time << endl;
 
 	return 0;
 }
diff --git a/main1.cpp b/main1.cpp
@@ -1,5 +1,5 @@
-#include "hnsw.h"
-
+#include "hnsw_implementation/hnsw.h"
+#include "./CPU/vector_initialize.h"
 #include <algorithm>
 #include <ctime>
 #include <iostream>
diff --git a/sample.py b/sample.py
diff --git a/sample_inputs/input_generator.cc b/sample_inputs/input_generator.cc
@@ -71,6 +71,7 @@ int main(){
   dump_func(1,2,10,5,0.9);
   dump_func(2,4,50,10,0.9);
   dump_func(3,4,1000,100,0.9);
+  dump_func(4,5,10000,100,0.9);
   return 0;
 }
 

Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,7 @@ int main(){`
`71`	`71`	`dump_func(1,2,10,5,0.9);`
`72`	`72`	`dump_func(2,4,50,10,0.9);`
`73`	`73`	`dump_func(3,4,1000,100,0.9);`
	`74`	`+ dump_func(4,5,10000,100,0.9);`
`74`	`75`	`return 0;`
`75`	`76`	`}`
`76`	`77`