@@ -47,7 +47,9 @@ void Handle::set_2D(size_t y, size_t x){
47
47
checkCudaErrors (cudaMallocHost ((void **)&pin_output,
48
48
total_size * sizeof (float )));
49
49
50
- checkCudaErrors (cudaMallocHost ((void **)&coords,
50
+ checkCudaErrors (cudaMalloc ((void **)&coords,
51
+ 2 * total_size * sizeof (float )));
52
+ checkCudaErrors (cudaMallocHost ((void **)&pin_coords,
51
53
2 * total_size * sizeof (float )));
52
54
53
55
dim3 threads (min (total_size, (long )512 ), 1 , 1 );
@@ -81,9 +83,11 @@ void Handle::set_3D(size_t z, size_t y, size_t x){
81
83
checkCudaErrors (cudaMallocHost ((void **)&pin_output,
82
84
total_size * sizeof (float )));
83
85
84
- checkCudaErrors (cudaMallocHost ((void **)&coords,
86
+ checkCudaErrors (cudaMalloc ((void **)&coords,
85
87
3 * total_size * sizeof (float )));
86
-
88
+ checkCudaErrors (cudaMallocHost ((void **)&pin_coords,
89
+ 3 * total_size * sizeof (float )));
90
+
87
91
dim3 threads (min (total_size, (long )512 ), 1 , 1 );
88
92
dim3 blocks (total_size/512 + 1 , 1 , 1 );
89
93
set_coords_3D<<<blocks, threads>>> (coords, dim_z, dim_y, dim_x);
@@ -110,19 +114,14 @@ void Handle::copy_output(float* ret){
110
114
void Handle::check_coords (float * output){
111
115
float * pin;
112
116
if (is_3D){
113
- checkCudaErrors (cudaMallocHost ((void **)&pin,
114
- 3 * total_size * sizeof (float )));
115
- checkCudaErrors (cudaMemcpyAsync (pin, coords, 3 * total_size * sizeof (float ),
117
+ checkCudaErrors (cudaMemcpyAsync (pin_coords, coords, 3 * total_size * sizeof (float ),
116
118
cudaMemcpyDeviceToHost));
117
- memcpy (output, pin , 3 * total_size * sizeof (float ));
119
+ memcpy (output, pin_coords , 3 * total_size * sizeof (float ));
118
120
}
119
121
else {
120
- checkCudaErrors (cudaMallocHost ((void **)&pin,
121
- 2 * total_size * sizeof (float )));
122
- checkCudaErrors (cudaMemcpyAsync (pin, coords, 2 * total_size * sizeof (float ),
122
+ checkCudaErrors (cudaMemcpyAsync (pin_coords, coords, 2 * total_size * sizeof (float ),
123
123
cudaMemcpyDeviceToHost));
124
- memcpy (output, pin , 2 * total_size * sizeof (float ));
124
+ memcpy (output, pin_coords , 2 * total_size * sizeof (float ));
125
125
}
126
- checkCudaErrors (cudaFreeHost (pin));
127
126
}
128
127
0 commit comments