diff options
Diffstat (limited to 'drivers/gpu/drm/scheduler/sched_main.c')
-rw-r--r-- | drivers/gpu/drm/scheduler/sched_main.c | 117 |
1 files changed, 89 insertions, 28 deletions
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index c1058eece16b..71ce6215956f 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -47,8 +47,10 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/sched.h> +#include <linux/completion.h> #include <uapi/linux/sched/types.h> -#include <drm/drmP.h> + +#include <drm/drm_print.h> #include <drm/gpu_scheduler.h> #include <drm/spsc_queue.h> @@ -90,6 +92,7 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq, if (!list_empty(&entity->list)) return; spin_lock(&rq->lock); + atomic_inc(&rq->sched->score); list_add_tail(&entity->list, &rq->entities); spin_unlock(&rq->lock); } @@ -108,6 +111,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, if (list_empty(&entity->list)) return; spin_lock(&rq->lock); + atomic_dec(&rq->sched->score); list_del_init(&entity->list); if (rq->current_entity == entity) rq->current_entity = NULL; @@ -133,6 +137,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) list_for_each_entry_continue(entity, &rq->entities, list) { if (drm_sched_entity_is_ready(entity)) { rq->current_entity = entity; + reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); return entity; } @@ -143,6 +148,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) if (drm_sched_entity_is_ready(entity)) { rq->current_entity = entity; + reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); return entity; } @@ -283,10 +289,21 @@ static void drm_sched_job_timedout(struct work_struct *work) unsigned long flags; sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); + + /* Protects against concurrent deletion in drm_sched_get_cleanup_job */ + spin_lock_irqsave(&sched->job_list_lock, flags); job = list_first_entry_or_null(&sched->ring_mirror_list, struct drm_sched_job, node); if (job) { + /* + * Remove the bad job so it cannot be freed by concurrent + * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread + * is parked at which point it's safe. + */ + list_del_init(&job->node); + spin_unlock_irqrestore(&sched->job_list_lock, flags); + job->sched->ops->timedout_job(job); /* @@ -297,6 +314,8 @@ static void drm_sched_job_timedout(struct work_struct *work) job->sched->ops->free_job(job); sched->free_guilty = false; } + } else { + spin_unlock_irqrestore(&sched->job_list_lock, flags); } spin_lock_irqsave(&sched->job_list_lock, flags); @@ -369,6 +388,20 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) kthread_park(sched->thread); /* + * Reinsert back the bad job here - now it's safe as + * drm_sched_get_cleanup_job cannot race against us and release the + * bad job at this point - we parked (waited for) any in progress + * (earlier) cleanups and drm_sched_get_cleanup_job will not be called + * now until the scheduler thread is unparked. + */ + if (bad && bad->sched == sched) + /* + * Add at the head of the queue to reflect it was the earliest + * job extracted. + */ + list_add(&bad->node, &sched->ring_mirror_list); + + /* * Iterate the job list from later to earlier one and either deactive * their HW callbacks or remove them from mirror list if they already * signaled. @@ -478,6 +511,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) struct drm_sched_job *s_job, *tmp; uint64_t guilty_context; bool found_guilty = false; + struct dma_fence *fence; list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; @@ -491,7 +525,18 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) dma_fence_set_error(&s_fence->finished, -ECANCELED); dma_fence_put(s_job->s_fence->parent); - s_job->s_fence->parent = sched->ops->run_job(s_job); + fence = sched->ops->run_job(s_job); + + if (IS_ERR_OR_NULL(fence)) { + if (IS_ERR(fence)) + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); + + s_job->s_fence->parent = NULL; + } else { + s_job->s_fence->parent = fence; + } + + } } EXPORT_SYMBOL(drm_sched_resubmit_jobs); @@ -612,7 +657,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) struct drm_gpu_scheduler *sched = s_fence->sched; atomic_dec(&sched->hw_rq_count); - atomic_dec(&sched->num_jobs); + atomic_dec(&sched->score); trace_drm_sched_process_job(s_fence); @@ -621,43 +666,45 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) } /** - * drm_sched_cleanup_jobs - destroy finished jobs + * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed * * @sched: scheduler instance * - * Remove all finished jobs from the mirror list and destroy them. + * Returns the next finished job from the mirror list (if there is one) + * ready for it to be destroyed. */ -static void drm_sched_cleanup_jobs(struct drm_gpu_scheduler *sched) +static struct drm_sched_job * +drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) { + struct drm_sched_job *job; unsigned long flags; - /* Don't destroy jobs while the timeout worker is running */ - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && - !cancel_delayed_work(&sched->work_tdr)) - return; - + /* + * Don't destroy jobs while the timeout worker is running OR thread + * is being parked and hence assumed to not touch ring_mirror_list + */ + if ((sched->timeout != MAX_SCHEDULE_TIMEOUT && + !cancel_delayed_work(&sched->work_tdr)) || + __kthread_should_park(sched->thread)) + return NULL; - while (!list_empty(&sched->ring_mirror_list)) { - struct drm_sched_job *job; + spin_lock_irqsave(&sched->job_list_lock, flags); - job = list_first_entry(&sched->ring_mirror_list, + job = list_first_entry_or_null(&sched->ring_mirror_list, struct drm_sched_job, node); - if (!dma_fence_is_signaled(&job->s_fence->finished)) - break; - spin_lock_irqsave(&sched->job_list_lock, flags); + if (job && dma_fence_is_signaled(&job->s_fence->finished)) { /* remove job from ring_mirror_list */ list_del_init(&job->node); - spin_unlock_irqrestore(&sched->job_list_lock, flags); - - sched->ops->free_job(job); + } else { + job = NULL; + /* queue timeout for next job */ + drm_sched_start_timeout(sched); } - /* queue timeout for next job */ - spin_lock_irqsave(&sched->job_list_lock, flags); - drm_sched_start_timeout(sched); spin_unlock_irqrestore(&sched->job_list_lock, flags); + return job; } /** @@ -697,17 +744,27 @@ static int drm_sched_main(void *param) struct drm_sched_fence *s_fence; struct drm_sched_job *sched_job; struct dma_fence *fence; + struct drm_sched_job *cleanup_job = NULL; wait_event_interruptible(sched->wake_up_worker, - (drm_sched_cleanup_jobs(sched), + (cleanup_job = drm_sched_get_cleanup_job(sched)) || (!drm_sched_blocked(sched) && (entity = drm_sched_select_entity(sched))) || - kthread_should_stop())); + kthread_should_stop()); + + if (cleanup_job) { + sched->ops->free_job(cleanup_job); + /* queue timeout for next job */ + drm_sched_start_timeout(sched); + } if (!entity) continue; sched_job = drm_sched_entity_pop_job(entity); + + complete(&entity->entity_idle); + if (!sched_job) continue; @@ -719,7 +776,7 @@ static int drm_sched_main(void *param) fence = sched->ops->run_job(sched_job); drm_sched_fence_scheduled(s_fence); - if (fence) { + if (!IS_ERR_OR_NULL(fence)) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &sched_job->cb, drm_sched_process_job); @@ -729,8 +786,12 @@ static int drm_sched_main(void *param) DRM_ERROR("fence add callback failed (%d)\n", r); dma_fence_put(fence); - } else + } else { + if (IS_ERR(fence)) + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); + drm_sched_process_job(NULL, &sched_job->cb); + } wake_up(&sched->job_scheduled); } @@ -771,7 +832,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); - atomic_set(&sched->num_jobs, 0); + atomic_set(&sched->score, 0); atomic64_set(&sched->job_id_count, 0); /* Each scheduler will run on a seperate kernel thread */ |