|
1 |
| -//============================================================== |
2 |
| -// Vector Add is the equivalent of a Hello, World! sample for data parallel |
3 |
| -// programs. Building and running the sample verifies that your development |
4 |
| -// environment is setup correctly and demonstrates the use of the core features |
5 |
| -// of DPC++. This sample runs on both CPU and GPU (or FPGA). When run, it |
6 |
| -// computes on both the CPU and offload device, then compares results. If the |
7 |
| -// code executes on both CPU and offload device, the device name and a success |
8 |
| -// message are displayed. And, your development environment is setup correctly! |
9 |
| -// |
10 |
| -// For comprehensive instructions regarding DPC++ Programming, go to |
11 |
| -// https://software.intel.com/en-us/oneapi-programming-guide and search based on |
12 |
| -// relevant terms noted in the comments. |
13 |
| -// |
14 |
| -// DPC++ material used in the code sample: |
15 |
| -// • A one dimensional array of data. |
16 |
| -// • A device queue, buffer, accessor, and kernel. |
17 |
| -//============================================================== |
18 |
| -// Copyright © 2020 Intel Corporation |
19 |
| -// |
20 |
| -// SPDX-License-Identifier: MIT |
21 |
| -// ============================================================= |
22 |
| -#include <CL/sycl.hpp> |
23 |
| -#include <array> |
24 |
| -#include <iostream> |
25 |
| -#include "dpc_common.hpp" |
26 |
| -#if FPGA || FPGA_EMULATOR |
27 |
| -#include <CL/sycl/intel/fpga_extensions.hpp> |
28 |
| -#endif |
29 |
| - |
30 |
| -using namespace sycl; |
31 |
| - |
32 |
| -// Array type and data size for this example. |
33 |
| -constexpr size_t array_size = 10000; |
34 |
| -typedef std::array<int, array_size> IntArray; |
35 |
| - |
36 |
| -//************************************ |
37 |
| -// Vector add in DPC++ on device: returns sum in 4th parameter "sum_parallel". |
38 |
| -//************************************ |
39 |
| -void VectorAdd(queue &q, const IntArray &a_array, const IntArray &b_array, |
40 |
| - IntArray &sum_parallel) { |
41 |
| - // Create the range object for the arrays managed by the buffer. |
42 |
| - range<1> num_items{a_array.size()}; |
43 |
| - |
44 |
| - // Create buffers that hold the data shared between the host and the devices. |
45 |
| - // The buffer destructor is responsible to copy the data back to host when it |
46 |
| - // goes out of scope. |
47 |
| - buffer a_buf(a_array); |
48 |
| - buffer b_buf(b_array); |
49 |
| - buffer sum_buf(sum_parallel.data(), num_items); |
50 |
| - |
51 |
| - // Submit a command group to the queue by a lambda function that contains the |
52 |
| - // data access permission and device computation (kernel). |
53 |
| - q.submit([&](handler &h) { |
54 |
| - // Create an accessor for each buffer with access permission: read, write or |
55 |
| - // read/write. The accessor is a mean to access the memory in the buffer. |
56 |
| - auto a = a_buf.get_access<access::mode::read>(h); |
57 |
| - auto b = b_buf.get_access<access::mode::read>(h); |
58 |
| - |
59 |
| - // The sum_accessor is used to store (with write permission) the sum data. |
60 |
| - auto sum = sum_buf.get_access<access::mode::write>(h); |
61 |
| - |
62 |
| - // Use parallel_for to run vector addition in parallel on device. This |
63 |
| - // executes the kernel. |
64 |
| - // 1st parameter is the number of work items. |
65 |
| - // 2nd parameter is the kernel, a lambda that specifies what to do per |
66 |
| - // work item. The parameter of the lambda is the work item id. |
67 |
| - // DPC++ supports unnamed lambda kernel by default. |
68 |
| - h.parallel_for(num_items, [=](id<1> i) { sum[i] = a[i] + b[i]; }); |
69 |
| - }); |
70 |
| -} |
71 |
| - |
72 |
| -//************************************ |
73 |
| -// Initialize the array from 0 to array_size - 1 |
74 |
| -//************************************ |
75 |
| -void InitializeArray(IntArray &a) { |
76 |
| - for (size_t i = 0; i < a.size(); i++) a[i] = i; |
77 |
| -} |
78 |
| - |
79 |
| -//************************************ |
80 |
| -// Demonstrate vector add both in sequential on CPU and in parallel on device. |
81 |
| -//************************************ |
82 |
| -int main() { |
83 |
| - // Create device selector for the device of your interest. |
84 |
| -#if FPGA_EMULATOR |
85 |
| - // DPC++ extension: FPGA emulator selector on systems without FPGA card. |
86 |
| - intel::fpga_emulator_selector d_selector; |
87 |
| -#elif FPGA |
88 |
| - // DPC++ extension: FPGA selector on systems with FPGA card. |
89 |
| - intel::fpga_selector d_selector; |
90 |
| -#else |
91 |
| - // The default device selector will select the most performant device. |
92 |
| - default_selector d_selector; |
93 |
| -#endif |
94 |
| - |
95 |
| - // Create array objects with "array_size" to store the input and output data. |
96 |
| - IntArray a, b, sum_sequential, sum_parallel; |
97 |
| - |
98 |
| - // Initialize input arrays with values from 0 to array_size - 1 |
99 |
| - InitializeArray(a); |
100 |
| - InitializeArray(b); |
101 |
| - |
102 |
| - try { |
103 |
| - queue q(d_selector, dpc::exception_handler); |
104 |
| - |
105 |
| - // Print out the device information used for the kernel code. |
106 |
| - std::cout << "Running on device: " |
107 |
| - << q.get_device().get_info<info::device::name>() << "\n"; |
108 |
| - std::cout << "Vector size: " << a.size() << "\n"; |
109 |
| - |
110 |
| - // Vector addition in DPC++ |
111 |
| - VectorAdd(q, a, b, sum_parallel); |
112 |
| - } catch (exception const &e) { |
113 |
| - std::cout << "An exception is caught for vector add.\n"; |
114 |
| - std::terminate(); |
115 |
| - } |
116 |
| - |
117 |
| - // Compute the sum of two arrays in sequential for validation. |
118 |
| - for (size_t i = 0; i < sum_sequential.size(); i++) |
119 |
| - sum_sequential[i] = a[i] + b[i]; |
120 |
| - |
121 |
| - // Verify that the two arrays are equal. |
122 |
| - for (size_t i = 0; i < sum_sequential.size(); i++) { |
123 |
| - if (sum_parallel[i] != sum_sequential[i]) { |
124 |
| - std::cout << "Vector add failed on device.\n"; |
125 |
| - return -1; |
126 |
| - } |
127 |
| - } |
128 |
| - |
129 |
| - int indices[]{0, 1, 2, (a.size() - 1)}; |
130 |
| - constexpr size_t indices_size = sizeof(indices) / sizeof(int); |
131 |
| - |
132 |
| - // Print out the result of vector add. |
133 |
| - for (int i = 0; i < indices_size; i++) { |
134 |
| - int j = indices[i]; |
135 |
| - if (i == indices_size - 1) std::cout << "...\n"; |
136 |
| - std::cout << "[" << j << "]: " << a[j] << " + " << b[j] << " = " |
137 |
| - << sum_parallel[j] << "\n"; |
138 |
| - } |
139 |
| - |
140 |
| - std::cout << "Vector add successfully completed on device.\n"; |
141 |
| - return 0; |
142 |
| -} |
| 1 | +//============================================================== |
| 2 | +// Vector Add is the equivalent of a Hello, World! sample for data parallel |
| 3 | +// programs. Building and running the sample verifies that your development |
| 4 | +// environment is setup correctly and demonstrates the use of the core features |
| 5 | +// of DPC++. This sample runs on both CPU and GPU (or FPGA). When run, it |
| 6 | +// computes on both the CPU and offload device, then compares results. If the |
| 7 | +// code executes on both CPU and offload device, the device name and a success |
| 8 | +// message are displayed. And, your development environment is setup correctly! |
| 9 | +// |
| 10 | +// For comprehensive instructions regarding DPC++ Programming, go to |
| 11 | +// https://software.intel.com/en-us/oneapi-programming-guide and search based on |
| 12 | +// relevant terms noted in the comments. |
| 13 | +// |
| 14 | +// DPC++ material used in the code sample: |
| 15 | +// • A one dimensional array of data. |
| 16 | +// • A device queue, buffer, accessor, and kernel. |
| 17 | +//============================================================== |
| 18 | +// Copyright © 2020 Intel Corporation |
| 19 | +// |
| 20 | +// SPDX-License-Identifier: MIT |
| 21 | +// ============================================================= |
| 22 | +#include <CL/sycl.hpp> |
| 23 | +#include <array> |
| 24 | +#include <iostream> |
| 25 | +#if FPGA || FPGA_EMULATOR |
| 26 | +#include <CL/sycl/intel/fpga_extensions.hpp> |
| 27 | +#endif |
| 28 | + |
| 29 | +using namespace sycl; |
| 30 | + |
| 31 | +// Array type and data size for this example. |
| 32 | +constexpr size_t array_size = 10000; |
| 33 | +typedef std::array<int, array_size> IntArray; |
| 34 | + |
| 35 | +// this exception handler with catch async exceptions |
| 36 | +static auto exception_handler = [](cl::sycl::exception_list eList) { |
| 37 | + for (std::exception_ptr const &e : eList) { |
| 38 | + try { |
| 39 | + std::rethrow_exception(e); |
| 40 | + } |
| 41 | + catch (std::exception const &e) { |
| 42 | +#if _DEBUG |
| 43 | + std::cout << "Failure" << std::endl; |
| 44 | +#endif |
| 45 | + std::terminate(); |
| 46 | + } |
| 47 | + } |
| 48 | +}; |
| 49 | + |
| 50 | +//************************************ |
| 51 | +// Vector add in DPC++ on device: returns sum in 4th parameter "sum_parallel". |
| 52 | +//************************************ |
| 53 | +void VectorAdd(queue &q, const IntArray &a_array, const IntArray &b_array, |
| 54 | + IntArray &sum_parallel) { |
| 55 | + // Create the range object for the arrays managed by the buffer. |
| 56 | + range<1> num_items{a_array.size()}; |
| 57 | + |
| 58 | + // Create buffers that hold the data shared between the host and the devices. |
| 59 | + // The buffer destructor is responsible to copy the data back to host when it |
| 60 | + // goes out of scope. |
| 61 | + buffer a_buf(a_array); |
| 62 | + buffer b_buf(b_array); |
| 63 | + buffer sum_buf(sum_parallel.data(), num_items); |
| 64 | + |
| 65 | + // Submit a command group to the queue by a lambda function that contains the |
| 66 | + // data access permission and device computation (kernel). |
| 67 | + q.submit([&](handler &h) { |
| 68 | + // Create an accessor for each buffer with access permission: read, write or |
| 69 | + // read/write. The accessor is a mean to access the memory in the buffer. |
| 70 | + auto a = a_buf.get_access<access::mode::read>(h); |
| 71 | + auto b = b_buf.get_access<access::mode::read>(h); |
| 72 | + |
| 73 | + // The sum_accessor is used to store (with write permission) the sum data. |
| 74 | + auto sum = sum_buf.get_access<access::mode::write>(h); |
| 75 | + |
| 76 | + // Use parallel_for to run vector addition in parallel on device. This |
| 77 | + // executes the kernel. |
| 78 | + // 1st parameter is the number of work items. |
| 79 | + // 2nd parameter is the kernel, a lambda that specifies what to do per |
| 80 | + // work item. The parameter of the lambda is the work item id. |
| 81 | + // DPC++ supports unnamed lambda kernel by default. |
| 82 | + h.parallel_for(num_items, [=](id<1> i) { sum[i] = a[i] + b[i]; }); |
| 83 | + }); |
| 84 | +} |
| 85 | + |
| 86 | +//************************************ |
| 87 | +// Initialize the array from 0 to array_size - 1 |
| 88 | +//************************************ |
| 89 | +void InitializeArray(IntArray &a) { |
| 90 | + for (size_t i = 0; i < a.size(); i++) a[i] = i; |
| 91 | +} |
| 92 | + |
| 93 | +//************************************ |
| 94 | +// Demonstrate vector add both in sequential on CPU and in parallel on device. |
| 95 | +//************************************ |
| 96 | +int main() { |
| 97 | + // Create device selector for the device of your interest. |
| 98 | +#if FPGA_EMULATOR |
| 99 | + // DPC++ extension: FPGA emulator selector on systems without FPGA card. |
| 100 | + intel::fpga_emulator_selector d_selector; |
| 101 | +#elif FPGA |
| 102 | + // DPC++ extension: FPGA selector on systems with FPGA card. |
| 103 | + intel::fpga_selector d_selector; |
| 104 | +#else |
| 105 | + // The default device selector will select the most performant device. |
| 106 | + default_selector d_selector; |
| 107 | +#endif |
| 108 | + |
| 109 | + // Create array objects with "array_size" to store the input and output data. |
| 110 | + IntArray a, b, sum_sequential, sum_parallel; |
| 111 | + |
| 112 | + // Initialize input arrays with values from 0 to array_size - 1 |
| 113 | + InitializeArray(a); |
| 114 | + InitializeArray(b); |
| 115 | + |
| 116 | + try { |
| 117 | + queue q(d_selector, exception_handler); |
| 118 | + |
| 119 | + // Print out the device information used for the kernel code. |
| 120 | + std::cout << "Running on device: " |
| 121 | + << q.get_device().get_info<info::device::name>() << "\n"; |
| 122 | + std::cout << "Vector size: " << a.size() << "\n"; |
| 123 | + |
| 124 | + // Vector addition in DPC++ |
| 125 | + VectorAdd(q, a, b, sum_parallel); |
| 126 | + } catch (exception const &e) { |
| 127 | + std::cout << "An exception is caught for vector add.\n"; |
| 128 | + std::terminate(); |
| 129 | + } |
| 130 | + |
| 131 | + // Compute the sum of two arrays in sequential for validation. |
| 132 | + for (size_t i = 0; i < sum_sequential.size(); i++) |
| 133 | + sum_sequential[i] = a[i] + b[i]; |
| 134 | + |
| 135 | + // Verify that the two arrays are equal. |
| 136 | + for (size_t i = 0; i < sum_sequential.size(); i++) { |
| 137 | + if (sum_parallel[i] != sum_sequential[i]) { |
| 138 | + std::cout << "Vector add failed on device.\n"; |
| 139 | + return -1; |
| 140 | + } |
| 141 | + } |
| 142 | + |
| 143 | + int indices[]{0, 1, 2, (a.size() - 1)}; |
| 144 | + constexpr size_t indices_size = sizeof(indices) / sizeof(int); |
| 145 | + |
| 146 | + // Print out the result of vector add. |
| 147 | + for (int i = 0; i < indices_size; i++) { |
| 148 | + int j = indices[i]; |
| 149 | + if (i == indices_size - 1) std::cout << "...\n"; |
| 150 | + std::cout << "[" << j << "]: " << a[j] << " + " << b[j] << " = " |
| 151 | + << sum_parallel[j] << "\n"; |
| 152 | + } |
| 153 | + |
| 154 | + std::cout << "Vector add successfully completed on device.\n"; |
| 155 | + return 0; |
| 156 | +} |
0 commit comments