Skip to content

Commit 119d364

Browse files
committed
Implement eventsWait, etc by waiting on signal events
1 parent 3d6e38f commit 119d364

7 files changed

+360
-212
lines changed

unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
6868
const ur_exp_command_buffer_desc_t *desc)
6969
: commandListManager(
7070
context, device,
71-
std::forward<v2::raii::command_list_unique_handle>(commandList),
72-
v2::EVENT_FLAGS_COUNTER, nullptr),
71+
std::forward<v2::raii::command_list_unique_handle>(commandList)),
7372
isUpdatable(desc ? desc->isUpdatable : false), context(context),
7473
device(device) {}
7574

unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp

Lines changed: 37 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@
1818

1919
ur_command_list_manager::ur_command_list_manager(
2020
ur_context_handle_t context, ur_device_handle_t device,
21-
v2::raii::command_list_unique_handle &&commandList, v2::event_flags_t flags,
22-
ur_queue_t_ *queue)
23-
: hContext(context), hDevice(device), queue(queue),
24-
eventPool(context->getEventPoolCache().borrow(device->Id.value(), flags)),
21+
v2::raii::command_list_unique_handle &&commandList)
22+
: hContext(context), hDevice(device),
2523
zeCommandList(std::move(commandList)) {
2624
UR_CALL_THROWS(ur::level_zero::urContextRetain(context));
2725
UR_CALL_THROWS(ur::level_zero::urDeviceRetain(device));
@@ -35,11 +33,9 @@ ur_command_list_manager::~ur_command_list_manager() {
3533
ur_result_t ur_command_list_manager::appendGenericFillUnlocked(
3634
ur_mem_buffer_t *dst, size_t offset, size_t patternSize,
3735
const void *pPattern, size_t size, uint32_t numEventsInWaitList,
38-
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
39-
ur_command_t commandType) {
40-
41-
auto zeSignalEvent = getSignalEvent(phEvent, commandType);
36+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
4237

38+
auto zeSignalEvent = getSignalEvent(phEvent);
4339
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
4440

4541
auto pDst = ur_cast<char *>(dst->getDevicePtr(
@@ -79,10 +75,8 @@ ur_result_t ur_command_list_manager::appendGenericFillUnlocked(
7975
ur_result_t ur_command_list_manager::appendGenericCopyUnlocked(
8076
ur_mem_buffer_t *src, ur_mem_buffer_t *dst, bool blocking, size_t srcOffset,
8177
size_t dstOffset, size_t size, uint32_t numEventsInWaitList,
82-
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
83-
ur_command_t commandType) {
84-
auto zeSignalEvent = getSignalEvent(phEvent, commandType);
85-
78+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
79+
auto zeSignalEvent = getSignalEvent(phEvent);
8680
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
8781

8882
auto pSrc = ur_cast<char *>(src->getDevicePtr(
@@ -119,13 +113,11 @@ ur_result_t ur_command_list_manager::appendRegionCopyUnlocked(
119113
ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin,
120114
ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch,
121115
size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList,
122-
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
123-
ur_command_t commandType) {
116+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
124117
auto zeParams = ur2zeRegionParams(srcOrigin, dstOrigin, region, srcRowPitch,
125118
dstRowPitch, srcSlicePitch, dstSlicePitch);
126119

127-
auto zeSignalEvent = getSignalEvent(phEvent, commandType);
128-
120+
auto zeSignalEvent = getSignalEvent(phEvent);
129121
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
130122

131123
auto pSrc = ur_cast<char *>(src->getDevicePtr(
@@ -175,11 +167,8 @@ wait_list_view ur_command_list_manager::getWaitListView(
175167
}
176168

177169
ze_event_handle_t
178-
ur_command_list_manager::getSignalEvent(ur_event_handle_t *hUserEvent,
179-
ur_command_t commandType) {
170+
ur_command_list_manager::getSignalEvent(ur_event_handle_t *hUserEvent) {
180171
if (hUserEvent) {
181-
*hUserEvent = eventPool->allocate();
182-
(*hUserEvent)->resetQueueAndCommand(queue, commandType);
183172
return (*hUserEvent)->getZeEvent();
184173
} else {
185174
return nullptr;
@@ -209,8 +198,7 @@ ur_result_t ur_command_list_manager::enqueueKernelLaunch(
209198
zeThreadGroupDimensions, WG, workDim,
210199
pGlobalWorkSize, pLocalWorkSize));
211200

212-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH);
213-
201+
auto zeSignalEvent = getSignalEvent(phEvent);
214202
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
215203

216204
auto memoryMigrate = [&](void *src, void *dst, size_t size) {
@@ -251,8 +239,7 @@ ur_result_t ur_command_list_manager::enqueueUSMMemcpy(
251239
ur_event_handle_t *phEvent) {
252240
TRACK_SCOPE_LATENCY("ur_command_list_manager::enqueueUSMMemcpy");
253241

254-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY);
255-
242+
auto zeSignalEvent = getSignalEvent(phEvent);
256243
auto [pWaitEvents, numWaitEvents] =
257244
getWaitListView(phEventWaitList, numEventsInWaitList);
258245

@@ -280,7 +267,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferFill(
280267

281268
return appendGenericFillUnlocked(hBuffer, offset, patternSize, pPattern, size,
282269
numEventsInWaitList, phEventWaitList,
283-
phEvent, UR_COMMAND_MEM_BUFFER_FILL);
270+
phEvent);
284271
}
285272

286273
ur_result_t ur_command_list_manager::enqueueUSMFill(
@@ -292,7 +279,7 @@ ur_result_t ur_command_list_manager::enqueueUSMFill(
292279
ur_usm_handle_t dstHandle(hContext, size, pMem);
293280
return appendGenericFillUnlocked(&dstHandle, 0, patternSize, pPattern, size,
294281
numEventsInWaitList, phEventWaitList,
295-
phEvent, UR_COMMAND_USM_FILL);
282+
phEvent);
296283
}
297284

298285
ur_result_t ur_command_list_manager::enqueueUSMPrefetch(
@@ -301,8 +288,7 @@ ur_result_t ur_command_list_manager::enqueueUSMPrefetch(
301288
ur_event_handle_t *phEvent) {
302289
TRACK_SCOPE_LATENCY("ur_command_list_manager::enqueueUSMPrefetch");
303290

304-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH);
305-
291+
auto zeSignalEvent = getSignalEvent(phEvent);
306292
auto [pWaitEvents, numWaitEvents] =
307293
getWaitListView(phEventWaitList, numEventsInWaitList);
308294

@@ -329,8 +315,7 @@ ur_command_list_manager::enqueueUSMAdvise(const void *pMem, size_t size,
329315

330316
auto zeAdvice = ur_cast<ze_memory_advice_t>(advice);
331317

332-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE);
333-
318+
auto zeSignalEvent = getSignalEvent(phEvent);
334319
auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0);
335320

336321
if (pWaitEvents) {
@@ -363,7 +348,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferRead(
363348

364349
return appendGenericCopyUnlocked(hBuffer, &dstHandle, blockingRead, offset, 0,
365350
size, numEventsInWaitList, phEventWaitList,
366-
phEvent, UR_COMMAND_MEM_BUFFER_READ);
351+
phEvent);
367352
}
368353

369354
ur_result_t ur_command_list_manager::enqueueMemBufferWrite(
@@ -379,9 +364,9 @@ ur_result_t ur_command_list_manager::enqueueMemBufferWrite(
379364

380365
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
381366

382-
return appendGenericCopyUnlocked(
383-
&srcHandle, hBuffer, blockingWrite, 0, offset, size, numEventsInWaitList,
384-
phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE);
367+
return appendGenericCopyUnlocked(&srcHandle, hBuffer, blockingWrite, 0,
368+
offset, size, numEventsInWaitList,
369+
phEventWaitList, phEvent);
385370
}
386371

387372
ur_result_t ur_command_list_manager::enqueueMemBufferCopy(
@@ -403,8 +388,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferCopy(
403388

404389
return appendGenericCopyUnlocked(hBufferSrc, hBufferDst, false, srcOffset,
405390
dstOffset, size, numEventsInWaitList,
406-
phEventWaitList, phEvent,
407-
UR_COMMAND_MEM_BUFFER_COPY);
391+
phEventWaitList, phEvent);
408392
}
409393

410394
ur_result_t ur_command_list_manager::enqueueMemBufferReadRect(
@@ -423,8 +407,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferReadRect(
423407
return appendRegionCopyUnlocked(
424408
hBuffer, &dstHandle, blockingRead, bufferOrigin, hostOrigin, region,
425409
bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch,
426-
numEventsInWaitList, phEventWaitList, phEvent,
427-
UR_COMMAND_MEM_BUFFER_READ_RECT);
410+
numEventsInWaitList, phEventWaitList, phEvent);
428411
}
429412

430413
ur_result_t ur_command_list_manager::enqueueMemBufferWriteRect(
@@ -443,8 +426,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferWriteRect(
443426
return appendRegionCopyUnlocked(
444427
&srcHandle, hBuffer, blockingWrite, hostOrigin, bufferOrigin, region,
445428
hostRowPitch, hostSlicePitch, bufferRowPitch, bufferSlicePitch,
446-
numEventsInWaitList, phEventWaitList, phEvent,
447-
UR_COMMAND_MEM_BUFFER_WRITE_RECT);
429+
numEventsInWaitList, phEventWaitList, phEvent);
448430
}
449431

450432
ur_result_t ur_command_list_manager::enqueueMemBufferCopyRect(
@@ -464,7 +446,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferCopyRect(
464446
return appendRegionCopyUnlocked(
465447
hBufferSrc, hBufferDst, false, srcOrigin, dstOrigin, region, srcRowPitch,
466448
srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList,
467-
phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY_RECT);
449+
phEventWaitList, phEvent);
468450
}
469451

470452
ur_result_t ur_command_list_manager::enqueueUSMMemcpy2D(
@@ -479,10 +461,9 @@ ur_result_t ur_command_list_manager::enqueueUSMMemcpy2D(
479461
ur_usm_handle_t srcHandle(hContext, 0, pSrc);
480462
ur_usm_handle_t dstHandle(hContext, 0, pDst);
481463

482-
return appendRegionCopyUnlocked(&srcHandle, &dstHandle, blocking, zeroOffset,
483-
zeroOffset, region, srcPitch, 0, dstPitch, 0,
484-
numEventsInWaitList, phEventWaitList, phEvent,
485-
UR_COMMAND_MEM_BUFFER_COPY_RECT);
464+
return appendRegionCopyUnlocked(
465+
&srcHandle, &dstHandle, blocking, zeroOffset, zeroOffset, region,
466+
srcPitch, 0, dstPitch, 0, numEventsInWaitList, phEventWaitList, phEvent);
486467
}
487468

488469
ur_result_t ur_command_list_manager::enqueueCooperativeKernelLaunchExp(
@@ -509,8 +490,7 @@ ur_result_t ur_command_list_manager::enqueueCooperativeKernelLaunchExp(
509490
zeThreadGroupDimensions, WG, workDim,
510491
pGlobalWorkSize, pLocalWorkSize));
511492

512-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH);
513-
493+
auto zeSignalEvent = getSignalEvent(phEvent);
514494
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
515495

516496
auto memoryMigrate = [&](void *src, void *dst, size_t size) {
@@ -555,7 +535,7 @@ ur_result_t ur_command_list_manager::enqueueTimestampRecordingExp(
555535
if (!phEvent) {
556536
return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
557537
}
558-
getSignalEvent(phEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP);
538+
559539
auto [pWaitEvents, numWaitEvents] =
560540
getWaitListView(phEventWaitList, numEventsInWaitList);
561541

@@ -582,8 +562,7 @@ ur_result_t ur_command_list_manager::enqueueGenericCommandListsExp(
582562
ur_event_handle_t additionalWaitEvent) {
583563
TRACK_SCOPE_LATENCY("ur_command_list_manager::enqueueGenericCommandListsExp");
584564

585-
auto zeSignalEvent = getSignalEvent(phEvent, callerCommand);
586-
565+
auto zeSignalEvent = getSignalEvent(phEvent);
587566
auto [pWaitEvents, numWaitEvents] = getWaitListView(
588567
phEventWaitList, numEventsInWaitList, additionalWaitEvent);
589568

@@ -627,7 +606,7 @@ ur_result_t ur_command_list_manager::enqueueMemImageRead(
627606

628607
auto hImage = hMem->getImage();
629608

630-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_IMAGE_READ);
609+
auto zeSignalEvent = getSignalEvent(phEvent);
631610
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
632611

633612
auto [zeImage, zeRegion] =
@@ -653,7 +632,7 @@ ur_result_t ur_command_list_manager::enqueueMemImageWrite(
653632

654633
auto hImage = hMem->getImage();
655634

656-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_IMAGE_WRITE);
635+
auto zeSignalEvent = getSignalEvent(phEvent);
657636
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
658637

659638
auto [zeImage, zeRegion] =
@@ -680,7 +659,7 @@ ur_result_t ur_command_list_manager::enqueueMemImageCopy(
680659
auto hImageSrc = hSrc->getImage();
681660
auto hImageDst = hDst->getImage();
682661

683-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_IMAGE_COPY);
662+
auto zeSignalEvent = getSignalEvent(phEvent);
684663
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
685664

686665
auto desc = ur_mem_image_t::getCopyRegions(*hImageSrc, *hImageDst, srcOrigin,
@@ -708,8 +687,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferMap(
708687

709688
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
710689

711-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_BUFFER_MAP);
712-
690+
auto zeSignalEvent = getSignalEvent(phEvent);
713691
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
714692

715693
auto pDst = ur_cast<char *>(hBuffer->mapHostPtr(
@@ -746,8 +724,7 @@ ur_result_t ur_command_list_manager::enqueueMemUnmap(
746724

747725
auto hBuffer = hMem->getBuffer();
748726

749-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_UNMAP);
750-
727+
auto zeSignalEvent = getSignalEvent(phEvent);
751728
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
752729

753730
// TODO: currently unmapHostPtr deallocates memory immediately,
@@ -890,7 +867,7 @@ ur_result_t ur_command_list_manager::bindlessImagesImageCopyExp(
890867
ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList,
891868
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
892869

893-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_IMAGE_COPY);
870+
auto zeSignalEvent = getSignalEvent(phEvent);
894871
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
895872

896873
return bindlessImagesHandleCopyFlags(
@@ -952,7 +929,7 @@ ur_result_t ur_command_list_manager::enqueueEventsWait(
952929
ur_event_handle_t *phEvent) {
953930
TRACK_SCOPE_LATENCY("ur_command_list_manager::enqueueEventsWait");
954931

955-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_EVENTS_WAIT);
932+
auto zeSignalEvent = getSignalEvent(phEvent);
956933
auto [pWaitEvents, numWaitEvents] =
957934
getWaitListView(phEventWaitList, numEventsInWaitList);
958935

@@ -974,8 +951,7 @@ ur_result_t ur_command_list_manager::enqueueEventsWaitWithBarrier(
974951
ur_event_handle_t *phEvent) {
975952
TRACK_SCOPE_LATENCY("ur_command_list_manager::enqueueEventsWaitWithBarrier");
976953

977-
auto zeSignalEvent =
978-
getSignalEvent(phEvent, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER);
954+
auto zeSignalEvent = getSignalEvent(phEvent);
979955
auto [pWaitEvents, numWaitEvents] =
980956
getWaitListView(phEventWaitList, numEventsInWaitList);
981957

unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ struct wait_list_view {
3737
struct ur_command_list_manager {
3838
ur_command_list_manager(ur_context_handle_t context,
3939
ur_device_handle_t device,
40-
v2::raii::command_list_unique_handle &&commandList,
41-
v2::event_flags_t flags, ur_queue_t_ *queue);
40+
v2::raii::command_list_unique_handle &&commandList);
4241
ur_command_list_manager(const ur_command_list_manager &src) = delete;
4342
ur_command_list_manager(ur_command_list_manager &&src) = default;
4443

@@ -64,8 +63,7 @@ struct ur_command_list_manager {
6463
wait_list_view
6564
getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents,
6665
ur_event_handle_t additionalWaitEvent = nullptr);
67-
ze_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent,
68-
ur_command_t commandType);
66+
ze_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent);
6967

7068
/************ Generic queue methods *************/
7169
ur_result_t enqueueEventsWait(uint32_t numEventsInWaitList,
@@ -262,35 +260,30 @@ struct ur_command_list_manager {
262260
uint32_t, const ur_event_handle_t *,
263261
ur_event_handle_t *);
264262

265-
protected:
263+
private:
266264
ur_result_t appendGenericFillUnlocked(
267265
ur_mem_buffer_t *hBuffer, size_t offset, size_t patternSize,
268266
const void *pPattern, size_t size, uint32_t numEventsInWaitList,
269-
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
270-
ur_command_t commandType);
267+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
271268

272-
ur_result_t appendGenericCopyUnlocked(
273-
ur_mem_buffer_t *src, ur_mem_buffer_t *dst, bool blocking,
274-
size_t srcOffset, size_t dstOffset, size_t size,
275-
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
276-
ur_event_handle_t *phEvent, ur_command_t commandType);
269+
ur_result_t
270+
appendGenericCopyUnlocked(ur_mem_buffer_t *src, ur_mem_buffer_t *dst,
271+
bool blocking, size_t srcOffset, size_t dstOffset,
272+
size_t size, uint32_t numEventsInWaitList,
273+
const ur_event_handle_t *phEventWaitList,
274+
ur_event_handle_t *phEvent);
277275

278276
ur_result_t appendRegionCopyUnlocked(
279277
ur_mem_buffer_t *src, ur_mem_buffer_t *dst, bool blocking,
280278
ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin,
281279
ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch,
282280
size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList,
283-
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
284-
ur_command_t commandType);
281+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
285282

286283
ur_context_handle_t hContext;
287284
ur_device_handle_t hDevice;
288-
ur_queue_t_ *queue;
289285

290286
std::vector<ur_kernel_handle_t> submittedKernels;
291-
292-
v2::raii::cache_borrowed_event_pool eventPool;
293287
v2::raii::command_list_unique_handle zeCommandList;
294-
295288
std::vector<ze_event_handle_t> waitList;
296289
};

0 commit comments

Comments
 (0)