diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 219 |
1 files changed, 218 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b7d06395d592..b3b6dab71638 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -21,10 +21,11 @@ * */ +#include <linux/ratelimit.h> +#include <linux/printk.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/types.h> -#include <linux/printk.h> #include <linux/bitops.h> #include <linux/sched.h> #include "kfd_priv.h" @@ -180,6 +181,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, goto out_unlock; } q->properties.vmid = qpd->vmid; + /* + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ + if (qpd->evicted) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; @@ -377,15 +386,29 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) { int retval; struct mqd_manager *mqd; + struct kfd_process_device *pdd; bool prev_active = false; mutex_lock(&dqm->lock); + pdd = kfd_get_process_device_data(q->device, q->process); + if (!pdd) { + retval = -ENODEV; + goto out_unlock; + } mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; goto out_unlock; } + /* + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ + if (pdd->qpd.evicted) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); /* Save previous activity state for counters */ prev_active = q->properties.is_active; @@ -457,6 +480,187 @@ static struct mqd_manager *get_mqd_manager( return mqd; } +static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q; + struct mqd_manager *mqd; + struct kfd_process_device *pdd; + int retval = 0; + + mutex_lock(&dqm->lock); + if (qpd->evicted++ > 0) /* already evicted, do nothing */ + goto out; + + pdd = qpd_to_pdd(qpd); + pr_info_ratelimited("Evicting PASID %u queues\n", + pdd->process->pasid); + + /* unactivate all active queues on the qpd */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_active) + continue; + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) { /* should not be here */ + pr_err("Cannot evict queue, mqd mgr is NULL\n"); + retval = -ENOMEM; + goto out; + } + q->properties.is_evicted = true; + q->properties.is_active = false; + retval = mqd->destroy_mqd(mqd, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, + KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); + if (retval) + goto out; + dqm->queue_count--; + } + +out: + mutex_unlock(&dqm->lock); + return retval; +} + +static int evict_process_queues_cpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q; + struct kfd_process_device *pdd; + int retval = 0; + + mutex_lock(&dqm->lock); + if (qpd->evicted++ > 0) /* already evicted, do nothing */ + goto out; + + pdd = qpd_to_pdd(qpd); + pr_info_ratelimited("Evicting PASID %u queues\n", + pdd->process->pasid); + + /* unactivate all active queues on the qpd */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_active) + continue; + q->properties.is_evicted = true; + q->properties.is_active = false; + dqm->queue_count--; + } + retval = execute_queues_cpsch(dqm, + qpd->is_debug ? + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + +out: + mutex_unlock(&dqm->lock); + return retval; +} + +static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q; + struct mqd_manager *mqd; + struct kfd_process_device *pdd; + uint32_t pd_base; + int retval = 0; + + pdd = qpd_to_pdd(qpd); + /* Retrieve PD base */ + pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + + mutex_lock(&dqm->lock); + if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ + goto out; + if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ + qpd->evicted--; + goto out; + } + + pr_info_ratelimited("Restoring PASID %u queues\n", + pdd->process->pasid); + + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; + pr_debug("Updated PD address to 0x%08x\n", pd_base); + + if (!list_empty(&qpd->queues_list)) { + dqm->dev->kfd2kgd->set_vm_context_page_table_base( + dqm->dev->kgd, + qpd->vmid, + qpd->page_table_base); + kfd_flush_tlb(pdd); + } + + /* activate all active queues on the qpd */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_evicted) + continue; + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) { /* should not be here */ + pr_err("Cannot restore queue, mqd mgr is NULL\n"); + retval = -ENOMEM; + goto out; + } + q->properties.is_evicted = false; + q->properties.is_active = true; + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + q->queue, &q->properties, + q->process->mm); + if (retval) + goto out; + dqm->queue_count++; + } + qpd->evicted = 0; +out: + mutex_unlock(&dqm->lock); + return retval; +} + +static int restore_process_queues_cpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q; + struct kfd_process_device *pdd; + uint32_t pd_base; + int retval = 0; + + pdd = qpd_to_pdd(qpd); + /* Retrieve PD base */ + pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + + mutex_lock(&dqm->lock); + if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ + goto out; + if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ + qpd->evicted--; + goto out; + } + + pr_info_ratelimited("Restoring PASID %u queues\n", + pdd->process->pasid); + + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; + pr_debug("Updated PD address to 0x%08x\n", pd_base); + + /* activate all active queues on the qpd */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_evicted) + continue; + q->properties.is_evicted = false; + q->properties.is_active = true; + dqm->queue_count++; + } + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!retval) + qpd->evicted = 0; +out: + mutex_unlock(&dqm->lock); + return retval; +} + static int register_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -853,6 +1057,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out; } + /* + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ + if (qpd->evicted) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); dqm->asic_ops.init_sdma_vm(dqm, q, qpd); @@ -1291,6 +1503,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.set_cache_memory_policy = set_cache_memory_policy; dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_cpsch; + dqm->ops.evict_process_queues = evict_process_queues_cpsch; + dqm->ops.restore_process_queues = restore_process_queues_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1307,6 +1521,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.set_cache_memory_policy = set_cache_memory_policy; dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_nocpsch; + dqm->ops.evict_process_queues = evict_process_queues_nocpsch; + dqm->ops.restore_process_queues = + restore_process_queues_nocpsch; break; default: pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); |