18
18
19
19
ur_command_list_manager::ur_command_list_manager (
20
20
ur_context_handle_t context, ur_device_handle_t device,
21
- v2::raii::command_list_unique_handle &&commandList, v2::event_flags_t flags,
22
- ur_queue_t_ *queue)
23
- : hContext(context), hDevice(device), queue(queue),
24
- eventPool(context->getEventPoolCache ().borrow(device->Id.value(), flags)),
21
+ v2::raii::command_list_unique_handle &&commandList)
22
+ : hContext(context), hDevice(device),
25
23
zeCommandList(std::move(commandList)) {
26
24
UR_CALL_THROWS (ur::level_zero::urContextRetain (context));
27
25
UR_CALL_THROWS (ur::level_zero::urDeviceRetain (device));
@@ -35,11 +33,9 @@ ur_command_list_manager::~ur_command_list_manager() {
35
33
ur_result_t ur_command_list_manager::appendGenericFillUnlocked (
36
34
ur_mem_buffer_t *dst, size_t offset, size_t patternSize,
37
35
const void *pPattern, size_t size, uint32_t numEventsInWaitList,
38
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
39
- ur_command_t commandType) {
40
-
41
- auto zeSignalEvent = getSignalEvent (phEvent, commandType);
36
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
42
37
38
+ auto zeSignalEvent = getSignalEvent (phEvent);
43
39
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
44
40
45
41
auto pDst = ur_cast<char *>(dst->getDevicePtr (
@@ -79,10 +75,8 @@ ur_result_t ur_command_list_manager::appendGenericFillUnlocked(
79
75
ur_result_t ur_command_list_manager::appendGenericCopyUnlocked (
80
76
ur_mem_buffer_t *src, ur_mem_buffer_t *dst, bool blocking, size_t srcOffset,
81
77
size_t dstOffset, size_t size, uint32_t numEventsInWaitList,
82
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
83
- ur_command_t commandType) {
84
- auto zeSignalEvent = getSignalEvent (phEvent, commandType);
85
-
78
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
79
+ auto zeSignalEvent = getSignalEvent (phEvent);
86
80
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
87
81
88
82
auto pSrc = ur_cast<char *>(src->getDevicePtr (
@@ -119,13 +113,11 @@ ur_result_t ur_command_list_manager::appendRegionCopyUnlocked(
119
113
ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin,
120
114
ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch,
121
115
size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList,
122
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
123
- ur_command_t commandType) {
116
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
124
117
auto zeParams = ur2zeRegionParams (srcOrigin, dstOrigin, region, srcRowPitch,
125
118
dstRowPitch, srcSlicePitch, dstSlicePitch);
126
119
127
- auto zeSignalEvent = getSignalEvent (phEvent, commandType);
128
-
120
+ auto zeSignalEvent = getSignalEvent (phEvent);
129
121
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
130
122
131
123
auto pSrc = ur_cast<char *>(src->getDevicePtr (
@@ -175,11 +167,8 @@ wait_list_view ur_command_list_manager::getWaitListView(
175
167
}
176
168
177
169
ze_event_handle_t
178
- ur_command_list_manager::getSignalEvent (ur_event_handle_t *hUserEvent,
179
- ur_command_t commandType) {
170
+ ur_command_list_manager::getSignalEvent (ur_event_handle_t *hUserEvent) {
180
171
if (hUserEvent) {
181
- *hUserEvent = eventPool->allocate ();
182
- (*hUserEvent)->resetQueueAndCommand (queue, commandType);
183
172
return (*hUserEvent)->getZeEvent ();
184
173
} else {
185
174
return nullptr ;
@@ -209,8 +198,7 @@ ur_result_t ur_command_list_manager::enqueueKernelLaunch(
209
198
zeThreadGroupDimensions, WG, workDim,
210
199
pGlobalWorkSize, pLocalWorkSize));
211
200
212
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_KERNEL_LAUNCH);
213
-
201
+ auto zeSignalEvent = getSignalEvent (phEvent);
214
202
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
215
203
216
204
auto memoryMigrate = [&](void *src, void *dst, size_t size) {
@@ -251,8 +239,7 @@ ur_result_t ur_command_list_manager::enqueueUSMMemcpy(
251
239
ur_event_handle_t *phEvent) {
252
240
TRACK_SCOPE_LATENCY (" ur_command_list_manager::enqueueUSMMemcpy" );
253
241
254
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_USM_MEMCPY);
255
-
242
+ auto zeSignalEvent = getSignalEvent (phEvent);
256
243
auto [pWaitEvents, numWaitEvents] =
257
244
getWaitListView (phEventWaitList, numEventsInWaitList);
258
245
@@ -280,7 +267,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferFill(
280
267
281
268
return appendGenericFillUnlocked (hBuffer, offset, patternSize, pPattern, size,
282
269
numEventsInWaitList, phEventWaitList,
283
- phEvent, UR_COMMAND_MEM_BUFFER_FILL );
270
+ phEvent);
284
271
}
285
272
286
273
ur_result_t ur_command_list_manager::enqueueUSMFill (
@@ -292,7 +279,7 @@ ur_result_t ur_command_list_manager::enqueueUSMFill(
292
279
ur_usm_handle_t dstHandle (hContext, size, pMem);
293
280
return appendGenericFillUnlocked (&dstHandle, 0 , patternSize, pPattern, size,
294
281
numEventsInWaitList, phEventWaitList,
295
- phEvent, UR_COMMAND_USM_FILL );
282
+ phEvent);
296
283
}
297
284
298
285
ur_result_t ur_command_list_manager::enqueueUSMPrefetch (
@@ -301,8 +288,7 @@ ur_result_t ur_command_list_manager::enqueueUSMPrefetch(
301
288
ur_event_handle_t *phEvent) {
302
289
TRACK_SCOPE_LATENCY (" ur_command_list_manager::enqueueUSMPrefetch" );
303
290
304
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_USM_PREFETCH);
305
-
291
+ auto zeSignalEvent = getSignalEvent (phEvent);
306
292
auto [pWaitEvents, numWaitEvents] =
307
293
getWaitListView (phEventWaitList, numEventsInWaitList);
308
294
@@ -329,8 +315,7 @@ ur_command_list_manager::enqueueUSMAdvise(const void *pMem, size_t size,
329
315
330
316
auto zeAdvice = ur_cast<ze_memory_advice_t >(advice);
331
317
332
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_USM_ADVISE);
333
-
318
+ auto zeSignalEvent = getSignalEvent (phEvent);
334
319
auto [pWaitEvents, numWaitEvents] = getWaitListView (nullptr , 0 );
335
320
336
321
if (pWaitEvents) {
@@ -363,7 +348,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferRead(
363
348
364
349
return appendGenericCopyUnlocked (hBuffer, &dstHandle, blockingRead, offset, 0 ,
365
350
size, numEventsInWaitList, phEventWaitList,
366
- phEvent, UR_COMMAND_MEM_BUFFER_READ );
351
+ phEvent);
367
352
}
368
353
369
354
ur_result_t ur_command_list_manager::enqueueMemBufferWrite (
@@ -379,9 +364,9 @@ ur_result_t ur_command_list_manager::enqueueMemBufferWrite(
379
364
380
365
std::scoped_lock<ur_shared_mutex> lock (hBuffer->getMutex ());
381
366
382
- return appendGenericCopyUnlocked (
383
- &srcHandle, hBuffer, blockingWrite, 0 , offset, size, numEventsInWaitList,
384
- phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE );
367
+ return appendGenericCopyUnlocked (&srcHandle, hBuffer, blockingWrite, 0 ,
368
+ offset, size, numEventsInWaitList,
369
+ phEventWaitList, phEvent);
385
370
}
386
371
387
372
ur_result_t ur_command_list_manager::enqueueMemBufferCopy (
@@ -403,8 +388,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferCopy(
403
388
404
389
return appendGenericCopyUnlocked (hBufferSrc, hBufferDst, false , srcOffset,
405
390
dstOffset, size, numEventsInWaitList,
406
- phEventWaitList, phEvent,
407
- UR_COMMAND_MEM_BUFFER_COPY);
391
+ phEventWaitList, phEvent);
408
392
}
409
393
410
394
ur_result_t ur_command_list_manager::enqueueMemBufferReadRect (
@@ -423,8 +407,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferReadRect(
423
407
return appendRegionCopyUnlocked (
424
408
hBuffer, &dstHandle, blockingRead, bufferOrigin, hostOrigin, region,
425
409
bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch,
426
- numEventsInWaitList, phEventWaitList, phEvent,
427
- UR_COMMAND_MEM_BUFFER_READ_RECT);
410
+ numEventsInWaitList, phEventWaitList, phEvent);
428
411
}
429
412
430
413
ur_result_t ur_command_list_manager::enqueueMemBufferWriteRect (
@@ -443,8 +426,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferWriteRect(
443
426
return appendRegionCopyUnlocked (
444
427
&srcHandle, hBuffer, blockingWrite, hostOrigin, bufferOrigin, region,
445
428
hostRowPitch, hostSlicePitch, bufferRowPitch, bufferSlicePitch,
446
- numEventsInWaitList, phEventWaitList, phEvent,
447
- UR_COMMAND_MEM_BUFFER_WRITE_RECT);
429
+ numEventsInWaitList, phEventWaitList, phEvent);
448
430
}
449
431
450
432
ur_result_t ur_command_list_manager::enqueueMemBufferCopyRect (
@@ -464,7 +446,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferCopyRect(
464
446
return appendRegionCopyUnlocked (
465
447
hBufferSrc, hBufferDst, false , srcOrigin, dstOrigin, region, srcRowPitch,
466
448
srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList,
467
- phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY_RECT );
449
+ phEventWaitList, phEvent);
468
450
}
469
451
470
452
ur_result_t ur_command_list_manager::enqueueUSMMemcpy2D (
@@ -479,10 +461,9 @@ ur_result_t ur_command_list_manager::enqueueUSMMemcpy2D(
479
461
ur_usm_handle_t srcHandle (hContext, 0 , pSrc);
480
462
ur_usm_handle_t dstHandle (hContext, 0 , pDst);
481
463
482
- return appendRegionCopyUnlocked (&srcHandle, &dstHandle, blocking, zeroOffset,
483
- zeroOffset, region, srcPitch, 0 , dstPitch, 0 ,
484
- numEventsInWaitList, phEventWaitList, phEvent,
485
- UR_COMMAND_MEM_BUFFER_COPY_RECT);
464
+ return appendRegionCopyUnlocked (
465
+ &srcHandle, &dstHandle, blocking, zeroOffset, zeroOffset, region,
466
+ srcPitch, 0 , dstPitch, 0 , numEventsInWaitList, phEventWaitList, phEvent);
486
467
}
487
468
488
469
ur_result_t ur_command_list_manager::enqueueCooperativeKernelLaunchExp (
@@ -509,8 +490,7 @@ ur_result_t ur_command_list_manager::enqueueCooperativeKernelLaunchExp(
509
490
zeThreadGroupDimensions, WG, workDim,
510
491
pGlobalWorkSize, pLocalWorkSize));
511
492
512
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_KERNEL_LAUNCH);
513
-
493
+ auto zeSignalEvent = getSignalEvent (phEvent);
514
494
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
515
495
516
496
auto memoryMigrate = [&](void *src, void *dst, size_t size) {
@@ -555,7 +535,7 @@ ur_result_t ur_command_list_manager::enqueueTimestampRecordingExp(
555
535
if (!phEvent) {
556
536
return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
557
537
}
558
- getSignalEvent (phEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP);
538
+
559
539
auto [pWaitEvents, numWaitEvents] =
560
540
getWaitListView (phEventWaitList, numEventsInWaitList);
561
541
@@ -582,8 +562,7 @@ ur_result_t ur_command_list_manager::enqueueGenericCommandListsExp(
582
562
ur_event_handle_t additionalWaitEvent) {
583
563
TRACK_SCOPE_LATENCY (" ur_command_list_manager::enqueueGenericCommandListsExp" );
584
564
585
- auto zeSignalEvent = getSignalEvent (phEvent, callerCommand);
586
-
565
+ auto zeSignalEvent = getSignalEvent (phEvent);
587
566
auto [pWaitEvents, numWaitEvents] = getWaitListView (
588
567
phEventWaitList, numEventsInWaitList, additionalWaitEvent);
589
568
@@ -627,7 +606,7 @@ ur_result_t ur_command_list_manager::enqueueMemImageRead(
627
606
628
607
auto hImage = hMem->getImage ();
629
608
630
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_IMAGE_READ );
609
+ auto zeSignalEvent = getSignalEvent (phEvent);
631
610
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
632
611
633
612
auto [zeImage, zeRegion] =
@@ -653,7 +632,7 @@ ur_result_t ur_command_list_manager::enqueueMemImageWrite(
653
632
654
633
auto hImage = hMem->getImage ();
655
634
656
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_IMAGE_WRITE );
635
+ auto zeSignalEvent = getSignalEvent (phEvent);
657
636
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
658
637
659
638
auto [zeImage, zeRegion] =
@@ -680,7 +659,7 @@ ur_result_t ur_command_list_manager::enqueueMemImageCopy(
680
659
auto hImageSrc = hSrc->getImage ();
681
660
auto hImageDst = hDst->getImage ();
682
661
683
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_IMAGE_COPY );
662
+ auto zeSignalEvent = getSignalEvent (phEvent);
684
663
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
685
664
686
665
auto desc = ur_mem_image_t::getCopyRegions (*hImageSrc, *hImageDst, srcOrigin,
@@ -708,8 +687,7 @@ ur_result_t ur_command_list_manager::enqueueMemBufferMap(
708
687
709
688
std::scoped_lock<ur_shared_mutex> lock (hBuffer->getMutex ());
710
689
711
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_BUFFER_MAP);
712
-
690
+ auto zeSignalEvent = getSignalEvent (phEvent);
713
691
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
714
692
715
693
auto pDst = ur_cast<char *>(hBuffer->mapHostPtr (
@@ -746,8 +724,7 @@ ur_result_t ur_command_list_manager::enqueueMemUnmap(
746
724
747
725
auto hBuffer = hMem->getBuffer ();
748
726
749
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_UNMAP);
750
-
727
+ auto zeSignalEvent = getSignalEvent (phEvent);
751
728
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
752
729
753
730
// TODO: currently unmapHostPtr deallocates memory immediately,
@@ -890,7 +867,7 @@ ur_result_t ur_command_list_manager::bindlessImagesImageCopyExp(
890
867
ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList,
891
868
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
892
869
893
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_IMAGE_COPY );
870
+ auto zeSignalEvent = getSignalEvent (phEvent);
894
871
auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
895
872
896
873
return bindlessImagesHandleCopyFlags (
@@ -952,7 +929,7 @@ ur_result_t ur_command_list_manager::enqueueEventsWait(
952
929
ur_event_handle_t *phEvent) {
953
930
TRACK_SCOPE_LATENCY (" ur_command_list_manager::enqueueEventsWait" );
954
931
955
- auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_EVENTS_WAIT );
932
+ auto zeSignalEvent = getSignalEvent (phEvent);
956
933
auto [pWaitEvents, numWaitEvents] =
957
934
getWaitListView (phEventWaitList, numEventsInWaitList);
958
935
@@ -974,8 +951,7 @@ ur_result_t ur_command_list_manager::enqueueEventsWaitWithBarrier(
974
951
ur_event_handle_t *phEvent) {
975
952
TRACK_SCOPE_LATENCY (" ur_command_list_manager::enqueueEventsWaitWithBarrier" );
976
953
977
- auto zeSignalEvent =
978
- getSignalEvent (phEvent, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER);
954
+ auto zeSignalEvent = getSignalEvent (phEvent);
979
955
auto [pWaitEvents, numWaitEvents] =
980
956
getWaitListView (phEventWaitList, numEventsInWaitList);
981
957
0 commit comments