Skip to content

Commit 6925dd9

Browse files
authored
FPGA gzip area fix (oneapi-src#639)
* Fix segfaults in GZIP design without adding predication to LSU in kernel, which caused an area increase. * GZIP seed update * Seed update for low latency version
1 parent cb67a41 commit 6925dd9

File tree

5 files changed

+23
-16
lines changed

5 files changed

+23
-16
lines changed

DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,13 @@ endif()
3535
if(FPGA_BOARD MATCHES ".*a10.*")
3636
# A10 parameters
3737
set(NUM_ENGINES 1)
38-
set(LL_SEED "-Xsseed=1")
39-
set(HIGH_BW_SEED "-Xsseed=6")
38+
set(LL_SEED "-Xsseed=4")
39+
set(HIGH_BW_SEED "-Xsseed=4")
4040
set(NUM_REORDER "")
4141
elseif(FPGA_BOARD MATCHES ".*s10.*")
4242
# S10 parameters
4343
set(NUM_ENGINES 2)
44-
set(LL_SEED "-Xsseed=2")
44+
set(LL_SEED "-Xsseed=6")
4545
set(HIGH_BW_SEED "-Xsseed=2")
4646
set(NUM_REORDER "-Xsnum-reorder=6")
4747
elseif(FPGA_BOARD MATCHES ".*agilex.*")

DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzip.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,10 @@ int CompressFile(queue &q, std::string &input_file, std::vector<std::string> out
277277

278278
int buffers_count = iterations;
279279

280+
// padding for the input and output buffers to deal with granularity of
281+
// kernel reads and writes
282+
constexpr size_t kInOutPadding = 16 * kVec;
283+
280284
// Create an array of kernel info structures and create buffers for kernel
281285
// input/output. The buffers are re-used between iterations, but enough
282286
// disjoint buffers are created to support double-buffering.
@@ -292,9 +296,10 @@ int CompressFile(queue &q, std::string &input_file, std::vector<std::string> out
292296
kinfo[eng][i].file_size = isz;
293297
// Allocating slightly larger buffers (+ 16 * kVec) to account for
294298
// granularity of kernel writes
295-
int outputSize = kinfo[eng][i].file_size + 16 * kVec < kMinBufferSize
296-
? kMinBufferSize
297-
: kinfo[eng][i].file_size + 16 * kVec;
299+
int outputSize =
300+
((isz + kInOutPadding) < kMinBufferSize) ? kMinBufferSize
301+
: (isz + kInOutPadding);
302+
const size_t input_alloc_size = isz + kInOutPadding;
298303

299304
// Pre-pin buffer using malloc_host() to improve DMA bandwidth.
300305
if (i >= 3) {
@@ -327,7 +332,7 @@ int CompressFile(queue &q, std::string &input_file, std::vector<std::string> out
327332
: new buffer<unsigned, 1>(kMinBufferSize);
328333
kinfo[eng][i].pibuf = i >= 3
329334
? kinfo[eng][i - 3].pibuf
330-
: new buffer<char, 1>(kinfo[eng][i].file_size);
335+
: new buffer<char, 1>(input_alloc_size);
331336
kinfo[eng][i].pobuf =
332337
i >= 3 ? kinfo[eng][i - 3].pobuf : new buffer<char, 1>(outputSize);
333338
kinfo[eng][i].pobuf_decompress = (char *)malloc(kinfo[eng][i].file_size);

DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzip_ll.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,14 +306,20 @@ int CompressFile(queue &q, std::string &input_file,
306306
}
307307
}
308308

309+
// padding for the input and output buffers to deal with granularity of
310+
// kernel reads and writes
311+
constexpr size_t kInOutPadding = 16 * kVec;
312+
309313
// This loop allocates host-side USM buffers, to be accessed by the kernel.
310314
for (size_t eng = 0; eng < kNumEngines; eng++) {
311315
for (int i = 0; i < buffers_count; i++) {
312316
kinfo[eng][i].input_size = isz;
313317
// Allocating slightly larger buffers (+ 16 * kVec) to account for
314318
// granularity of kernel writes
315319
kinfo[eng][i].output_size =
316-
isz + 16 * kVec < kMinBufferSize ? kMinBufferSize : isz + 16 * kVec;
320+
((isz + kInOutPadding) < kMinBufferSize) ? kMinBufferSize
321+
: (isz + kInOutPadding);
322+
const size_t input_alloc_size = isz + kInOutPadding;
317323

318324
kinfo[eng][i].last_block = true;
319325
kinfo[eng][i].pref_buffer = pinbuf;
@@ -351,7 +357,7 @@ int CompressFile(queue &q, std::string &input_file,
351357
// since the buffers get subsequently reused.
352358
for (int b = 0; b < BATCH_SIZE; b++) {
353359
kinfo[eng][i].pibuf_ptr_array[b] =
354-
alloc_char.allocate(kinfo[eng][i].input_size * sizeof(char));
360+
alloc_char.allocate(input_alloc_size * sizeof(char));
355361
kinfo[eng][i].pobuf_ptr_array[b] =
356362
alloc_char.allocate(kinfo[eng][i].output_size * sizeof(char));
357363
memset(kinfo[eng][i].pobuf_ptr_array[b], 0,

DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2063,14 +2063,11 @@ void SubmitGzipTasksSingleEngine(
20632063

20642064
// load in new data
20652065
struct LzInput in;
2066+
Unroller<0, kVec>::step([&](int i) { in.data[i] = acc_pibuf[inpos++]; });
20662067
Unroller<0, kVec>::step([&](int i) {
2067-
// prevent out-of-bounds reads
2068-
in.data[i] = (inpos < accessor_isz) ? acc_pibuf[inpos++] : 0;
2068+
current_window[i + kVec] = in.data[i];
20692069
});
20702070

2071-
Unroller<0, kVec>::step(
2072-
[&](int i) { current_window[i + kVec] = in.data[i]; });
2073-
20742071
do {
20752072
//-----------------------------
20762073
// Prepare current window

DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel_ll.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2193,8 +2193,7 @@ event SubmitLZReduction(queue &q, size_t block_size, bool last_block,
21932193

21942194
// load in new data
21952195
Unroller<0, kVec>::step([&](int i) {
2196-
// guarding against out-of-bounds accesses
2197-
in.data[i] = (inpos < accessor_isz) ? acc_pibuf[inpos++] : 0;
2196+
in.data[i] = acc_pibuf[inpos++];
21982197
input_data.arr[16 * (int)crc_ch_load_upper + i] = in.data[i];
21992198
});
22002199

0 commit comments

Comments
 (0)