Skip to content

Commit 2b1f9a4

Browse files
authored
Add all files (oneapi-src#1436)
1 parent f23efea commit 2b1f9a4

File tree

115 files changed

+19117
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+19117
-0
lines changed
Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
/*
2+
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions
6+
* are met:
7+
* * Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* * Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in the
11+
* documentation and/or other materials provided with the distribution.
12+
* * Neither the name of NVIDIA CORPORATION nor the names of its
13+
* contributors may be used to endorse or promote products derived
14+
* from this software without specific prior written permission.
15+
*
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
*/
28+
29+
#pragma once
30+
31+
#include <sycl/sycl.hpp>
32+
#include <dpct/dpct.hpp>
33+
#include <cmath>
34+
#include <functional>
35+
#include <iostream>
36+
#include <random>
37+
#include <stdexcept>
38+
#include <string>
39+
#include <dpct/lib_common_utils.hpp>
40+
41+
#include <complex>
42+
43+
// CUDA API error checking
44+
/*
45+
DPCT1001:1: The statement could not be removed.
46+
*/
47+
/*
48+
DPCT1000:2: Error handling if-stmt was detected but could not be rewritten.
49+
*/
50+
#define CUDA_CHECK(err) \
51+
do { \
52+
int err_ = (err); \
53+
if (err_ != 0) { \
54+
std::printf("CUDA error %d at %s:%d\n", err_, __FILE__, __LINE__); \
55+
throw std::runtime_error("CUDA error"); \
56+
} \
57+
} while (0)
58+
59+
// cublas API error checking
60+
#define CUBLAS_CHECK(err) \
61+
do { \
62+
int err_ = (err); \
63+
if (err_ != 0) { \
64+
std::printf("cublas error %d at %s:%d\n", err_, __FILE__, \
65+
__LINE__); \
66+
throw std::runtime_error("cublas error"); \
67+
} \
68+
} while (0)
69+
70+
// memory alignment
71+
#define ALIGN_TO(A, B) (((A + B - 1) / B) * B)
72+
73+
// device memory pitch alignment
74+
static const size_t device_alignment = 32;
75+
76+
// type traits
77+
template <typename T> struct traits;
78+
79+
template <> struct traits<float> {
80+
// scalar type
81+
typedef float T;
82+
typedef T S;
83+
84+
static constexpr T zero = 0.f;
85+
static constexpr dpct::library_data_t cuda_data_type =
86+
dpct::library_data_t::real_float;
87+
88+
inline static S abs(T val) { return fabs(val); }
89+
90+
template <typename RNG> inline static T rand(RNG &gen) { return (S)gen(); }
91+
92+
inline static T add(T a, T b) { return a + b; }
93+
94+
inline static T mul(T v, double f) { return v * f; }
95+
};
96+
97+
template <> struct traits<double> {
98+
// scalar type
99+
typedef double T;
100+
typedef T S;
101+
102+
static constexpr T zero = 0.;
103+
static constexpr dpct::library_data_t cuda_data_type =
104+
dpct::library_data_t::real_double;
105+
106+
inline static S abs(T val) { return fabs(val); }
107+
108+
template <typename RNG> inline static T rand(RNG &gen) { return (S)gen(); }
109+
110+
inline static T add(T a, T b) { return a + b; }
111+
112+
inline static T mul(T v, double f) { return v * f; }
113+
};
114+
115+
template <> struct traits<sycl::float2> {
116+
// scalar type
117+
typedef float S;
118+
typedef sycl::float2 T;
119+
120+
static constexpr T zero = {0.f, 0.f};
121+
static constexpr dpct::library_data_t cuda_data_type =
122+
dpct::library_data_t::complex_float;
123+
124+
inline static S abs(T val) { return dpct::cabs<float>(val); }
125+
126+
template <typename RNG> inline static T rand(RNG &gen) {
127+
return sycl::float2((S)gen(), (S)gen());
128+
}
129+
130+
inline static T add(T a, T b) { return a + b; }
131+
inline static T add(T a, S b) { return a + sycl::float2(b, 0.f); }
132+
133+
inline static T mul(T v, double f) {
134+
return sycl::float2(v.x() * f, v.y() * f);
135+
}
136+
};
137+
138+
template <> struct traits<sycl::double2> {
139+
// scalar type
140+
typedef double S;
141+
typedef sycl::double2 T;
142+
143+
static constexpr T zero = {0., 0.};
144+
static constexpr dpct::library_data_t cuda_data_type =
145+
dpct::library_data_t::complex_double;
146+
147+
inline static S abs(T val) { return dpct::cabs<double>(val); }
148+
149+
template <typename RNG> inline static T rand(RNG &gen) {
150+
return sycl::double2((S)gen(), (S)gen());
151+
}
152+
153+
inline static T add(T a, T b) { return a + b; }
154+
inline static T add(T a, S b) { return a + sycl::double2(b, 0.); }
155+
156+
inline static T mul(T v, double f) {
157+
return sycl::double2(v.x() * f, v.y() * f);
158+
}
159+
};
160+
161+
template <typename T> void print_matrix(const int &m, const int &n, const T *A, const int &lda);
162+
163+
template <> void print_matrix(const int &m, const int &n, const float *A, const int &lda) {
164+
for (int i = 0; i < m; i++) {
165+
for (int j = 0; j < n; j++) {
166+
std::printf("%0.2f ", A[j * lda + i]);
167+
}
168+
std::printf("\n");
169+
}
170+
}
171+
172+
template <> void print_matrix(const int &m, const int &n, const double *A, const int &lda) {
173+
for (int i = 0; i < m; i++) {
174+
for (int j = 0; j < n; j++) {
175+
std::printf("%0.2f ", A[j * lda + i]);
176+
}
177+
std::printf("\n");
178+
}
179+
}
180+
181+
template <>
182+
void print_matrix(const int &m, const int &n, const sycl::float2 *A,
183+
const int &lda) {
184+
for (int i = 0; i < m; i++) {
185+
for (int j = 0; j < n; j++) {
186+
std::printf("%0.2f + %0.2fj ", A[j * lda + i].x(),
187+
A[j * lda + i].y());
188+
}
189+
std::printf("\n");
190+
}
191+
}
192+
193+
template <>
194+
void print_matrix(const int &m, const int &n, const sycl::double2 *A,
195+
const int &lda) {
196+
for (int i = 0; i < m; i++) {
197+
for (int j = 0; j < n; j++) {
198+
std::printf("%0.2f + %0.2fj ", A[j * lda + i].x(),
199+
A[j * lda + i].y());
200+
}
201+
std::printf("\n");
202+
}
203+
}
204+
205+
template <typename T> void print_vector(const int &m, const T *A);
206+
207+
template <> void print_vector(const int &m, const float *A) {
208+
for (int i = 0; i < m; i++) {
209+
std::printf("%0.2f ", A[i]);
210+
}
211+
std::printf("\n");
212+
}
213+
214+
template <> void print_vector(const int &m, const double *A) {
215+
for (int i = 0; i < m; i++) {
216+
std::printf("%0.2f ", A[i]);
217+
}
218+
std::printf("\n");
219+
}
220+
221+
template <> void print_vector(const int &m, const sycl::float2 *A) {
222+
for (int i = 0; i < m; i++) {
223+
std::printf("%0.2f + %0.2fj ", A[i].x(), A[i].y());
224+
}
225+
std::printf("\n");
226+
}
227+
228+
template <> void print_vector(const int &m, const sycl::double2 *A) {
229+
for (int i = 0; i < m; i++) {
230+
std::printf("%0.2f + %0.2fj ", A[i].x(), A[i].y());
231+
}
232+
std::printf("\n");
233+
}
234+
235+
template <typename T> void generate_random_matrix(int m, int n, T **A, int *lda) {
236+
std::random_device rd;
237+
std::mt19937 gen(rd());
238+
std::uniform_real_distribution<typename traits<T>::S> dis(-1.0, 1.0);
239+
auto rand_gen = std::bind(dis, gen);
240+
241+
*lda = n;
242+
243+
size_t matrix_mem_size = static_cast<size_t>(*lda * m * sizeof(T));
244+
// suppress gcc 7 size warning
245+
if (matrix_mem_size <= PTRDIFF_MAX)
246+
*A = (T *)malloc(matrix_mem_size);
247+
else
248+
throw std::runtime_error("Memory allocation size is too large");
249+
250+
if (*A == NULL)
251+
throw std::runtime_error("Unable to allocate host matrix");
252+
253+
// random matrix and accumulate row sums
254+
for (int i = 0; i < m; ++i) {
255+
for (int j = 0; j < n; ++j) {
256+
T *A_row = (*A) + *lda * i;
257+
A_row[j] = traits<T>::rand(rand_gen);
258+
}
259+
}
260+
}
261+
262+
// Makes matrix A of size mxn and leading dimension lda diagonal dominant
263+
template <typename T> void make_diag_dominant_matrix(int m, int n, T *A, int lda) {
264+
for (int i = 0; i < std::min(m, n); ++i) {
265+
T *A_row = A + lda * i;
266+
auto row_sum = traits<typename traits<T>::S>::zero;
267+
for (int j = 0; j < n; ++j) {
268+
row_sum += traits<T>::abs(A_row[j]);
269+
}
270+
A_row[i] = traits<T>::add(A_row[i], row_sum);
271+
}
272+
}
273+
274+
// Returns cudaDataType value as defined in library_types.h for the string
275+
// containing type name
276+
dpct::library_data_t get_cuda_library_type(std::string type_string) {
277+
if (type_string.compare("CUDA_R_16F") == 0)
278+
return dpct::library_data_t::real_half;
279+
else if (type_string.compare("CUDA_C_16F") == 0)
280+
return dpct::library_data_t::complex_half;
281+
else if (type_string.compare("CUDA_R_32F") == 0)
282+
return dpct::library_data_t::real_float;
283+
else if (type_string.compare("CUDA_C_32F") == 0)
284+
return dpct::library_data_t::complex_float;
285+
else if (type_string.compare("CUDA_R_64F") == 0)
286+
return dpct::library_data_t::real_double;
287+
else if (type_string.compare("CUDA_C_64F") == 0)
288+
return dpct::library_data_t::complex_double;
289+
else if (type_string.compare("CUDA_R_8I") == 0)
290+
return dpct::library_data_t::real_int8;
291+
else if (type_string.compare("CUDA_C_8I") == 0)
292+
return dpct::library_data_t::complex_int8;
293+
else if (type_string.compare("CUDA_R_8U") == 0)
294+
return dpct::library_data_t::real_uint8;
295+
else if (type_string.compare("CUDA_C_8U") == 0)
296+
return dpct::library_data_t::complex_uint8;
297+
else if (type_string.compare("CUDA_R_32I") == 0)
298+
return dpct::library_data_t::real_int32;
299+
else if (type_string.compare("CUDA_C_32I") == 0)
300+
return dpct::library_data_t::complex_int32;
301+
else if (type_string.compare("CUDA_R_32U") == 0)
302+
return dpct::library_data_t::real_uint32;
303+
else if (type_string.compare("CUDA_C_32U") == 0)
304+
return dpct::library_data_t::complex_uint32;
305+
else
306+
throw std::runtime_error("Unknown CUDA datatype");
307+
}

0 commit comments

Comments
 (0)