summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c219
1 files changed, 218 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b7d06395d592..b3b6dab71638 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -21,10 +21,11 @@
*
*/
+#include <linux/ratelimit.h>
+#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/types.h>
-#include <linux/printk.h>
#include <linux/bitops.h>
#include <linux/sched.h>
#include "kfd_priv.h"
@@ -180,6 +181,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
goto out_unlock;
}
q->properties.vmid = qpd->vmid;
+ /*
+ * Eviction state logic: we only mark active queues as evicted
+ * to avoid the overhead of restoring inactive queues later
+ */
+ if (qpd->evicted)
+ q->properties.is_evicted = (q->properties.queue_size > 0 &&
+ q->properties.queue_percent > 0 &&
+ q->properties.queue_address != 0);
q->properties.tba_addr = qpd->tba_addr;
q->properties.tma_addr = qpd->tma_addr;
@@ -377,15 +386,29 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
{
int retval;
struct mqd_manager *mqd;
+ struct kfd_process_device *pdd;
bool prev_active = false;
mutex_lock(&dqm->lock);
+ pdd = kfd_get_process_device_data(q->device, q->process);
+ if (!pdd) {
+ retval = -ENODEV;
+ goto out_unlock;
+ }
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
retval = -ENOMEM;
goto out_unlock;
}
+ /*
+ * Eviction state logic: we only mark active queues as evicted
+ * to avoid the overhead of restoring inactive queues later
+ */
+ if (pdd->qpd.evicted)
+ q->properties.is_evicted = (q->properties.queue_size > 0 &&
+ q->properties.queue_percent > 0 &&
+ q->properties.queue_address != 0);
/* Save previous activity state for counters */
prev_active = q->properties.is_active;
@@ -457,6 +480,187 @@ static struct mqd_manager *get_mqd_manager(
return mqd;
}
+static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct queue *q;
+ struct mqd_manager *mqd;
+ struct kfd_process_device *pdd;
+ int retval = 0;
+
+ mutex_lock(&dqm->lock);
+ if (qpd->evicted++ > 0) /* already evicted, do nothing */
+ goto out;
+
+ pdd = qpd_to_pdd(qpd);
+ pr_info_ratelimited("Evicting PASID %u queues\n",
+ pdd->process->pasid);
+
+ /* unactivate all active queues on the qpd */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (!q->properties.is_active)
+ continue;
+ mqd = dqm->ops.get_mqd_manager(dqm,
+ get_mqd_type_from_queue_type(q->properties.type));
+ if (!mqd) { /* should not be here */
+ pr_err("Cannot evict queue, mqd mgr is NULL\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+ q->properties.is_evicted = true;
+ q->properties.is_active = false;
+ retval = mqd->destroy_mqd(mqd, q->mqd,
+ KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
+ KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
+ if (retval)
+ goto out;
+ dqm->queue_count--;
+ }
+
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct queue *q;
+ struct kfd_process_device *pdd;
+ int retval = 0;
+
+ mutex_lock(&dqm->lock);
+ if (qpd->evicted++ > 0) /* already evicted, do nothing */
+ goto out;
+
+ pdd = qpd_to_pdd(qpd);
+ pr_info_ratelimited("Evicting PASID %u queues\n",
+ pdd->process->pasid);
+
+ /* unactivate all active queues on the qpd */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (!q->properties.is_active)
+ continue;
+ q->properties.is_evicted = true;
+ q->properties.is_active = false;
+ dqm->queue_count--;
+ }
+ retval = execute_queues_cpsch(dqm,
+ qpd->is_debug ?
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct queue *q;
+ struct mqd_manager *mqd;
+ struct kfd_process_device *pdd;
+ uint32_t pd_base;
+ int retval = 0;
+
+ pdd = qpd_to_pdd(qpd);
+ /* Retrieve PD base */
+ pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
+
+ mutex_lock(&dqm->lock);
+ if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
+ goto out;
+ if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
+ qpd->evicted--;
+ goto out;
+ }
+
+ pr_info_ratelimited("Restoring PASID %u queues\n",
+ pdd->process->pasid);
+
+ /* Update PD Base in QPD */
+ qpd->page_table_base = pd_base;
+ pr_debug("Updated PD address to 0x%08x\n", pd_base);
+
+ if (!list_empty(&qpd->queues_list)) {
+ dqm->dev->kfd2kgd->set_vm_context_page_table_base(
+ dqm->dev->kgd,
+ qpd->vmid,
+ qpd->page_table_base);
+ kfd_flush_tlb(pdd);
+ }
+
+ /* activate all active queues on the qpd */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (!q->properties.is_evicted)
+ continue;
+ mqd = dqm->ops.get_mqd_manager(dqm,
+ get_mqd_type_from_queue_type(q->properties.type));
+ if (!mqd) { /* should not be here */
+ pr_err("Cannot restore queue, mqd mgr is NULL\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+ q->properties.is_evicted = false;
+ q->properties.is_active = true;
+ retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
+ q->queue, &q->properties,
+ q->process->mm);
+ if (retval)
+ goto out;
+ dqm->queue_count++;
+ }
+ qpd->evicted = 0;
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
+static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct queue *q;
+ struct kfd_process_device *pdd;
+ uint32_t pd_base;
+ int retval = 0;
+
+ pdd = qpd_to_pdd(qpd);
+ /* Retrieve PD base */
+ pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
+
+ mutex_lock(&dqm->lock);
+ if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
+ goto out;
+ if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
+ qpd->evicted--;
+ goto out;
+ }
+
+ pr_info_ratelimited("Restoring PASID %u queues\n",
+ pdd->process->pasid);
+
+ /* Update PD Base in QPD */
+ qpd->page_table_base = pd_base;
+ pr_debug("Updated PD address to 0x%08x\n", pd_base);
+
+ /* activate all active queues on the qpd */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (!q->properties.is_evicted)
+ continue;
+ q->properties.is_evicted = false;
+ q->properties.is_active = true;
+ dqm->queue_count++;
+ }
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!retval)
+ qpd->evicted = 0;
+out:
+ mutex_unlock(&dqm->lock);
+ return retval;
+}
+
static int register_process(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -853,6 +1057,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = -ENOMEM;
goto out;
}
+ /*
+ * Eviction state logic: we only mark active queues as evicted
+ * to avoid the overhead of restoring inactive queues later
+ */
+ if (qpd->evicted)
+ q->properties.is_evicted = (q->properties.queue_size > 0 &&
+ q->properties.queue_percent > 0 &&
+ q->properties.queue_address != 0);
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
@@ -1291,6 +1503,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
dqm->ops.set_trap_handler = set_trap_handler;
dqm->ops.process_termination = process_termination_cpsch;
+ dqm->ops.evict_process_queues = evict_process_queues_cpsch;
+ dqm->ops.restore_process_queues = restore_process_queues_cpsch;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
@@ -1307,6 +1521,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
dqm->ops.set_trap_handler = set_trap_handler;
dqm->ops.process_termination = process_termination_nocpsch;
+ dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
+ dqm->ops.restore_process_queues =
+ restore_process_queues_nocpsch;
break;
default:
pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
OpenPOWER on IntegriCloud