Skip to content

Commit 01ff93c

Browse files
authored
removed dpc_common.hpp dependency, updated linux makefiles to remove … (oneapi-src#43)
* removed dpc_common.hpp dependency, updated linux makefiles to remove '-r' in 'rm -rf' Signed-off-by: Jenn500 <[email protected]> * updated Makefile Makefile.fpga Makefile.win Makefile.win.fpga clean command Signed-off-by: Jenn500 <[email protected]> * updated .vcxproj WindowSDK with WindowsSDKVersion.Replace Signed-off-by: Jenn500 <[email protected]>
1 parent 1ba5da7 commit 01ff93c

File tree

8 files changed

+177
-212
lines changed

8 files changed

+177
-212
lines changed

DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ run_usm:
2222
./$(USM_EXE_NAME)
2323

2424
clean:
25-
rm -rf $(BUFFER_EXE_NAME) $(USM_EXE_NAME)
25+
rm -f $(BUFFER_EXE_NAME) $(USM_EXE_NAME)

DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/Makefile.win

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ run_usm:
2222
$(USM_EXE_NAME)
2323

2424
clean:
25-
del /f $(EXE_NAME) $(USM_EXE_NAME)
25+
del /F /Q *.ilk *.pdb $(EXE_NAME) $(USM_EXE_NAME)

DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/Makefile.win.fpga

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ run_usm:
2323
@echo USM is not supported for FPGAs, yet
2424

2525
clean:
26-
del /f *.o *.d *.out *.mon *.emu *.aocr *.aoco *.prj *.fpga_emu *.fpga_emu_buffers *.a $(EXE) $(USM_EXE)
26+
del /F /S /Q *.ilk *.pdb *.o *.d *.out *.mon *.emu *.aocr *.aoco *.prj *.fpga_emu *.fpga_emu_buffers *.a $(EXE) $(USM_EXE)

DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/src/dpc_common.hpp

Lines changed: 0 additions & 63 deletions
This file was deleted.
Lines changed: 156 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -1,142 +1,156 @@
1-
//==============================================================
2-
// Vector Add is the equivalent of a Hello, World! sample for data parallel
3-
// programs. Building and running the sample verifies that your development
4-
// environment is setup correctly and demonstrates the use of the core features
5-
// of DPC++. This sample runs on both CPU and GPU (or FPGA). When run, it
6-
// computes on both the CPU and offload device, then compares results. If the
7-
// code executes on both CPU and offload device, the device name and a success
8-
// message are displayed. And, your development environment is setup correctly!
9-
//
10-
// For comprehensive instructions regarding DPC++ Programming, go to
11-
// https://software.intel.com/en-us/oneapi-programming-guide and search based on
12-
// relevant terms noted in the comments.
13-
//
14-
// DPC++ material used in the code sample:
15-
// • A one dimensional array of data.
16-
// • A device queue, buffer, accessor, and kernel.
17-
//==============================================================
18-
// Copyright © 2020 Intel Corporation
19-
//
20-
// SPDX-License-Identifier: MIT
21-
// =============================================================
22-
#include <CL/sycl.hpp>
23-
#include <array>
24-
#include <iostream>
25-
#include "dpc_common.hpp"
26-
#if FPGA || FPGA_EMULATOR
27-
#include <CL/sycl/intel/fpga_extensions.hpp>
28-
#endif
29-
30-
using namespace sycl;
31-
32-
// Array type and data size for this example.
33-
constexpr size_t array_size = 10000;
34-
typedef std::array<int, array_size> IntArray;
35-
36-
//************************************
37-
// Vector add in DPC++ on device: returns sum in 4th parameter "sum_parallel".
38-
//************************************
39-
void VectorAdd(queue &q, const IntArray &a_array, const IntArray &b_array,
40-
IntArray &sum_parallel) {
41-
// Create the range object for the arrays managed by the buffer.
42-
range<1> num_items{a_array.size()};
43-
44-
// Create buffers that hold the data shared between the host and the devices.
45-
// The buffer destructor is responsible to copy the data back to host when it
46-
// goes out of scope.
47-
buffer a_buf(a_array);
48-
buffer b_buf(b_array);
49-
buffer sum_buf(sum_parallel.data(), num_items);
50-
51-
// Submit a command group to the queue by a lambda function that contains the
52-
// data access permission and device computation (kernel).
53-
q.submit([&](handler &h) {
54-
// Create an accessor for each buffer with access permission: read, write or
55-
// read/write. The accessor is a mean to access the memory in the buffer.
56-
auto a = a_buf.get_access<access::mode::read>(h);
57-
auto b = b_buf.get_access<access::mode::read>(h);
58-
59-
// The sum_accessor is used to store (with write permission) the sum data.
60-
auto sum = sum_buf.get_access<access::mode::write>(h);
61-
62-
// Use parallel_for to run vector addition in parallel on device. This
63-
// executes the kernel.
64-
// 1st parameter is the number of work items.
65-
// 2nd parameter is the kernel, a lambda that specifies what to do per
66-
// work item. The parameter of the lambda is the work item id.
67-
// DPC++ supports unnamed lambda kernel by default.
68-
h.parallel_for(num_items, [=](id<1> i) { sum[i] = a[i] + b[i]; });
69-
});
70-
}
71-
72-
//************************************
73-
// Initialize the array from 0 to array_size - 1
74-
//************************************
75-
void InitializeArray(IntArray &a) {
76-
for (size_t i = 0; i < a.size(); i++) a[i] = i;
77-
}
78-
79-
//************************************
80-
// Demonstrate vector add both in sequential on CPU and in parallel on device.
81-
//************************************
82-
int main() {
83-
// Create device selector for the device of your interest.
84-
#if FPGA_EMULATOR
85-
// DPC++ extension: FPGA emulator selector on systems without FPGA card.
86-
intel::fpga_emulator_selector d_selector;
87-
#elif FPGA
88-
// DPC++ extension: FPGA selector on systems with FPGA card.
89-
intel::fpga_selector d_selector;
90-
#else
91-
// The default device selector will select the most performant device.
92-
default_selector d_selector;
93-
#endif
94-
95-
// Create array objects with "array_size" to store the input and output data.
96-
IntArray a, b, sum_sequential, sum_parallel;
97-
98-
// Initialize input arrays with values from 0 to array_size - 1
99-
InitializeArray(a);
100-
InitializeArray(b);
101-
102-
try {
103-
queue q(d_selector, dpc::exception_handler);
104-
105-
// Print out the device information used for the kernel code.
106-
std::cout << "Running on device: "
107-
<< q.get_device().get_info<info::device::name>() << "\n";
108-
std::cout << "Vector size: " << a.size() << "\n";
109-
110-
// Vector addition in DPC++
111-
VectorAdd(q, a, b, sum_parallel);
112-
} catch (exception const &e) {
113-
std::cout << "An exception is caught for vector add.\n";
114-
std::terminate();
115-
}
116-
117-
// Compute the sum of two arrays in sequential for validation.
118-
for (size_t i = 0; i < sum_sequential.size(); i++)
119-
sum_sequential[i] = a[i] + b[i];
120-
121-
// Verify that the two arrays are equal.
122-
for (size_t i = 0; i < sum_sequential.size(); i++) {
123-
if (sum_parallel[i] != sum_sequential[i]) {
124-
std::cout << "Vector add failed on device.\n";
125-
return -1;
126-
}
127-
}
128-
129-
int indices[]{0, 1, 2, (a.size() - 1)};
130-
constexpr size_t indices_size = sizeof(indices) / sizeof(int);
131-
132-
// Print out the result of vector add.
133-
for (int i = 0; i < indices_size; i++) {
134-
int j = indices[i];
135-
if (i == indices_size - 1) std::cout << "...\n";
136-
std::cout << "[" << j << "]: " << a[j] << " + " << b[j] << " = "
137-
<< sum_parallel[j] << "\n";
138-
}
139-
140-
std::cout << "Vector add successfully completed on device.\n";
141-
return 0;
142-
}
1+
//==============================================================
2+
// Vector Add is the equivalent of a Hello, World! sample for data parallel
3+
// programs. Building and running the sample verifies that your development
4+
// environment is setup correctly and demonstrates the use of the core features
5+
// of DPC++. This sample runs on both CPU and GPU (or FPGA). When run, it
6+
// computes on both the CPU and offload device, then compares results. If the
7+
// code executes on both CPU and offload device, the device name and a success
8+
// message are displayed. And, your development environment is setup correctly!
9+
//
10+
// For comprehensive instructions regarding DPC++ Programming, go to
11+
// https://software.intel.com/en-us/oneapi-programming-guide and search based on
12+
// relevant terms noted in the comments.
13+
//
14+
// DPC++ material used in the code sample:
15+
// • A one dimensional array of data.
16+
// • A device queue, buffer, accessor, and kernel.
17+
//==============================================================
18+
// Copyright © 2020 Intel Corporation
19+
//
20+
// SPDX-License-Identifier: MIT
21+
// =============================================================
22+
#include <CL/sycl.hpp>
23+
#include <array>
24+
#include <iostream>
25+
#if FPGA || FPGA_EMULATOR
26+
#include <CL/sycl/intel/fpga_extensions.hpp>
27+
#endif
28+
29+
using namespace sycl;
30+
31+
// Array type and data size for this example.
32+
constexpr size_t array_size = 10000;
33+
typedef std::array<int, array_size> IntArray;
34+
35+
// this exception handler with catch async exceptions
36+
static auto exception_handler = [](cl::sycl::exception_list eList) {
37+
for (std::exception_ptr const &e : eList) {
38+
try {
39+
std::rethrow_exception(e);
40+
}
41+
catch (std::exception const &e) {
42+
#if _DEBUG
43+
std::cout << "Failure" << std::endl;
44+
#endif
45+
std::terminate();
46+
}
47+
}
48+
};
49+
50+
//************************************
51+
// Vector add in DPC++ on device: returns sum in 4th parameter "sum_parallel".
52+
//************************************
53+
void VectorAdd(queue &q, const IntArray &a_array, const IntArray &b_array,
54+
IntArray &sum_parallel) {
55+
// Create the range object for the arrays managed by the buffer.
56+
range<1> num_items{a_array.size()};
57+
58+
// Create buffers that hold the data shared between the host and the devices.
59+
// The buffer destructor is responsible to copy the data back to host when it
60+
// goes out of scope.
61+
buffer a_buf(a_array);
62+
buffer b_buf(b_array);
63+
buffer sum_buf(sum_parallel.data(), num_items);
64+
65+
// Submit a command group to the queue by a lambda function that contains the
66+
// data access permission and device computation (kernel).
67+
q.submit([&](handler &h) {
68+
// Create an accessor for each buffer with access permission: read, write or
69+
// read/write. The accessor is a mean to access the memory in the buffer.
70+
auto a = a_buf.get_access<access::mode::read>(h);
71+
auto b = b_buf.get_access<access::mode::read>(h);
72+
73+
// The sum_accessor is used to store (with write permission) the sum data.
74+
auto sum = sum_buf.get_access<access::mode::write>(h);
75+
76+
// Use parallel_for to run vector addition in parallel on device. This
77+
// executes the kernel.
78+
// 1st parameter is the number of work items.
79+
// 2nd parameter is the kernel, a lambda that specifies what to do per
80+
// work item. The parameter of the lambda is the work item id.
81+
// DPC++ supports unnamed lambda kernel by default.
82+
h.parallel_for(num_items, [=](id<1> i) { sum[i] = a[i] + b[i]; });
83+
});
84+
}
85+
86+
//************************************
87+
// Initialize the array from 0 to array_size - 1
88+
//************************************
89+
void InitializeArray(IntArray &a) {
90+
for (size_t i = 0; i < a.size(); i++) a[i] = i;
91+
}
92+
93+
//************************************
94+
// Demonstrate vector add both in sequential on CPU and in parallel on device.
95+
//************************************
96+
int main() {
97+
// Create device selector for the device of your interest.
98+
#if FPGA_EMULATOR
99+
// DPC++ extension: FPGA emulator selector on systems without FPGA card.
100+
intel::fpga_emulator_selector d_selector;
101+
#elif FPGA
102+
// DPC++ extension: FPGA selector on systems with FPGA card.
103+
intel::fpga_selector d_selector;
104+
#else
105+
// The default device selector will select the most performant device.
106+
default_selector d_selector;
107+
#endif
108+
109+
// Create array objects with "array_size" to store the input and output data.
110+
IntArray a, b, sum_sequential, sum_parallel;
111+
112+
// Initialize input arrays with values from 0 to array_size - 1
113+
InitializeArray(a);
114+
InitializeArray(b);
115+
116+
try {
117+
queue q(d_selector, exception_handler);
118+
119+
// Print out the device information used for the kernel code.
120+
std::cout << "Running on device: "
121+
<< q.get_device().get_info<info::device::name>() << "\n";
122+
std::cout << "Vector size: " << a.size() << "\n";
123+
124+
// Vector addition in DPC++
125+
VectorAdd(q, a, b, sum_parallel);
126+
} catch (exception const &e) {
127+
std::cout << "An exception is caught for vector add.\n";
128+
std::terminate();
129+
}
130+
131+
// Compute the sum of two arrays in sequential for validation.
132+
for (size_t i = 0; i < sum_sequential.size(); i++)
133+
sum_sequential[i] = a[i] + b[i];
134+
135+
// Verify that the two arrays are equal.
136+
for (size_t i = 0; i < sum_sequential.size(); i++) {
137+
if (sum_parallel[i] != sum_sequential[i]) {
138+
std::cout << "Vector add failed on device.\n";
139+
return -1;
140+
}
141+
}
142+
143+
int indices[]{0, 1, 2, (a.size() - 1)};
144+
constexpr size_t indices_size = sizeof(indices) / sizeof(int);
145+
146+
// Print out the result of vector add.
147+
for (int i = 0; i < indices_size; i++) {
148+
int j = indices[i];
149+
if (i == indices_size - 1) std::cout << "...\n";
150+
std::cout << "[" << j << "]: " << a[j] << " + " << b[j] << " = "
151+
<< sum_parallel[j] << "\n";
152+
}
153+
154+
std::cout << "Vector add successfully completed on device.\n";
155+
return 0;
156+
}

0 commit comments

Comments
 (0)