Skip to content

Commit 167bf96

Browse files
Andrey Grodzovskyalexdeucher
authored andcommitted
drm/sched: Set error to s_fence if HW job submission failed.
Problem: When run_job fails and HW fence returned is NULL we still signal the s_fence to avoid hangs but the user has no way of knowing if the actual HW job was ran and finished. Fix: Allow .run_job implementations to return ERR_PTR in the fence pointer returned and then set this error for s_fence->finished fence so whoever wait on this fence can inspect the signaled fence for an error. Signed-off-by: Andrey Grodzovsky <[email protected]> Reviewed-by: Christian König <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 47661f6 commit 167bf96

File tree

1 file changed

+16
-3
lines changed

1 file changed

+16
-3
lines changed

drivers/gpu/drm/scheduler/sched_main.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
479479
struct drm_sched_job *s_job, *tmp;
480480
uint64_t guilty_context;
481481
bool found_guilty = false;
482+
struct dma_fence *fence;
482483

483484
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
484485
struct drm_sched_fence *s_fence = s_job->s_fence;
@@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
492493
dma_fence_set_error(&s_fence->finished, -ECANCELED);
493494

494495
dma_fence_put(s_job->s_fence->parent);
495-
s_job->s_fence->parent = sched->ops->run_job(s_job);
496+
fence = sched->ops->run_job(s_job);
497+
498+
if (IS_ERR_OR_NULL(fence)) {
499+
s_job->s_fence->parent = NULL;
500+
dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
501+
} else {
502+
s_job->s_fence->parent = fence;
503+
}
504+
505+
496506
}
497507
}
498508
EXPORT_SYMBOL(drm_sched_resubmit_jobs);
@@ -720,7 +730,7 @@ static int drm_sched_main(void *param)
720730
fence = sched->ops->run_job(sched_job);
721731
drm_sched_fence_scheduled(s_fence);
722732

723-
if (fence) {
733+
if (!IS_ERR_OR_NULL(fence)) {
724734
s_fence->parent = dma_fence_get(fence);
725735
r = dma_fence_add_callback(fence, &sched_job->cb,
726736
drm_sched_process_job);
@@ -730,8 +740,11 @@ static int drm_sched_main(void *param)
730740
DRM_ERROR("fence add callback failed (%d)\n",
731741
r);
732742
dma_fence_put(fence);
733-
} else
743+
} else {
744+
745+
dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
734746
drm_sched_process_job(NULL, &sched_job->cb);
747+
}
735748

736749
wake_up(&sched->job_scheduled);
737750
}

0 commit comments

Comments
 (0)