summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_process.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c146
1 files changed, 69 insertions, 77 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 8f1076c0c88a..25b90f70aecd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -32,6 +32,7 @@
#include <linux/mman.h>
#include <linux/file.h>
#include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
struct mm_struct;
@@ -62,8 +63,8 @@ static struct workqueue_struct *kfd_restore_wq;
static struct kfd_process *find_process(const struct task_struct *thread);
static void kfd_process_ref_release(struct kref *ref);
-static struct kfd_process *create_process(const struct task_struct *thread,
- struct file *filep);
+static struct kfd_process *create_process(const struct task_struct *thread);
+static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
static void evict_process_worker(struct work_struct *work);
static void restore_process_worker(struct work_struct *work);
@@ -289,7 +290,15 @@ struct kfd_process *kfd_create_process(struct file *filep)
if (process) {
pr_debug("Process already found\n");
} else {
- process = create_process(thread, filep);
+ process = create_process(thread);
+ if (IS_ERR(process))
+ goto out;
+
+ ret = kfd_process_init_cwsr_apu(process, filep);
+ if (ret) {
+ process = ERR_PTR(ret);
+ goto out;
+ }
if (!procfs.kobj)
goto out;
@@ -316,6 +325,8 @@ struct kfd_process *kfd_create_process(struct file *filep)
(int)process->lead_thread->pid);
}
out:
+ if (!IS_ERR(process))
+ kref_get(&process->ref);
mutex_unlock(&kfd_processes_mutex);
return process;
@@ -408,7 +419,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
list_for_each_entry_safe(pdd, temp, &p->per_device_data,
per_device_list) {
- pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
+ pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
pdd->dev->id, p->pasid);
if (pdd->drm_file) {
@@ -478,11 +489,9 @@ static void kfd_process_ref_release(struct kref *ref)
queue_work(kfd_process_wq, &p->release_work);
}
-static void kfd_process_destroy_delayed(struct rcu_head *rcu)
+static void kfd_process_free_notifier(struct mmu_notifier *mn)
{
- struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
-
- kfd_unref_process(p);
+ kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
}
static void kfd_process_notifier_release(struct mmu_notifier *mn,
@@ -534,12 +543,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
mutex_unlock(&p->mutex);
- mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
- mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
+ mmu_notifier_put(&p->mmu_notifier);
}
static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
.release = kfd_process_notifier_release,
+ .free_notifier = kfd_process_free_notifier,
};
static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
@@ -554,8 +563,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
- offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
- << PAGE_SHIFT;
+ offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
MAP_SHARED, offset);
@@ -609,81 +617,69 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
return 0;
}
-static struct kfd_process *create_process(const struct task_struct *thread,
- struct file *filep)
+/*
+ * On return the kfd_process is fully operational and will be freed when the
+ * mm is released
+ */
+static struct kfd_process *create_process(const struct task_struct *thread)
{
struct kfd_process *process;
int err = -ENOMEM;
process = kzalloc(sizeof(*process), GFP_KERNEL);
-
if (!process)
goto err_alloc_process;
- process->pasid = kfd_pasid_alloc();
- if (process->pasid == 0)
- goto err_alloc_pasid;
-
- if (kfd_alloc_process_doorbells(process) < 0)
- goto err_alloc_doorbells;
-
kref_init(&process->ref);
-
mutex_init(&process->mutex);
-
process->mm = thread->mm;
-
- /* register notifier */
- process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
- err = mmu_notifier_register(&process->mmu_notifier, process->mm);
- if (err)
- goto err_mmu_notifier;
-
- hash_add_rcu(kfd_processes_table, &process->kfd_processes,
- (uintptr_t)process->mm);
-
process->lead_thread = thread->group_leader;
- get_task_struct(process->lead_thread);
-
INIT_LIST_HEAD(&process->per_device_data);
-
+ INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
+ INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
+ process->last_restore_timestamp = get_jiffies_64();
kfd_event_init_process(process);
+ process->is_32bit_user_mode = in_compat_syscall();
+
+ process->pasid = kfd_pasid_alloc();
+ if (process->pasid == 0)
+ goto err_alloc_pasid;
+
+ if (kfd_alloc_process_doorbells(process) < 0)
+ goto err_alloc_doorbells;
err = pqm_init(&process->pqm, process);
if (err != 0)
goto err_process_pqm_init;
/* init process apertures*/
- process->is_32bit_user_mode = in_compat_syscall();
err = kfd_init_apertures(process);
if (err != 0)
goto err_init_apertures;
- INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
- INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
- process->last_restore_timestamp = get_jiffies_64();
-
- err = kfd_process_init_cwsr_apu(process, filep);
+ /* Must be last, have to use release destruction after this */
+ process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
+ err = mmu_notifier_register(&process->mmu_notifier, process->mm);
if (err)
- goto err_init_cwsr;
+ goto err_register_notifier;
+
+ get_task_struct(process->lead_thread);
+ hash_add_rcu(kfd_processes_table, &process->kfd_processes,
+ (uintptr_t)process->mm);
return process;
-err_init_cwsr:
+err_register_notifier:
kfd_process_free_outstanding_kfd_bos(process);
kfd_process_destroy_pdds(process);
err_init_apertures:
pqm_uninit(&process->pqm);
err_process_pqm_init:
- hash_del_rcu(&process->kfd_processes);
- synchronize_rcu();
- mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
-err_mmu_notifier:
- mutex_destroy(&process->mutex);
kfd_free_process_doorbells(process);
err_alloc_doorbells:
kfd_pasid_free(process->pasid);
err_alloc_pasid:
+ mutex_destroy(&process->mutex);
kfree(process);
err_alloc_process:
return ERR_PTR(err);
@@ -693,6 +689,8 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
struct kfd_dev *dev)
{
unsigned int i;
+ int range_start = dev->shared_resources.non_cp_doorbells_start;
+ int range_end = dev->shared_resources.non_cp_doorbells_end;
if (!KFD_IS_SOC15(dev->device_info->asic_family))
return 0;
@@ -704,14 +702,16 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
return -ENOMEM;
/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
+ range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+ range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
+
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
- if (i >= dev->shared_resources.non_cp_doorbells_start
- && i <= dev->shared_resources.non_cp_doorbells_end) {
+ if (i >= range_start && i <= range_end) {
set_bit(i, qpd->doorbell_bitmap);
set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
qpd->doorbell_bitmap);
- pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i,
- i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
}
}
@@ -801,6 +801,8 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
return ret;
}
+ amdgpu_vm_set_task_info(pdd->vm);
+
ret = kfd_process_device_reserve_ib_mem(pdd);
if (ret)
goto err_reserve_ib_mem;
@@ -1024,7 +1026,7 @@ static void evict_process_worker(struct work_struct *work)
*/
flush_delayed_work(&p->restore_work);
- pr_debug("Started evicting pasid %d\n", p->pasid);
+ pr_debug("Started evicting pasid 0x%x\n", p->pasid);
ret = kfd_process_evict_queues(p);
if (!ret) {
dma_fence_signal(p->ef);
@@ -1033,16 +1035,15 @@ static void evict_process_worker(struct work_struct *work)
queue_delayed_work(kfd_restore_wq, &p->restore_work,
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
- pr_debug("Finished evicting pasid %d\n", p->pasid);
+ pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
} else
- pr_err("Failed to evict queues of pasid %d\n", p->pasid);
+ pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
}
static void restore_process_worker(struct work_struct *work)
{
struct delayed_work *dwork;
struct kfd_process *p;
- struct kfd_process_device *pdd;
int ret = 0;
dwork = to_delayed_work(work);
@@ -1051,17 +1052,7 @@ static void restore_process_worker(struct work_struct *work)
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, restore_work);
-
- /* Call restore_process_bos on the first KGD device. This function
- * takes care of restoring the whole process including other devices.
- * Restore can fail if enough memory is not available. If so,
- * reschedule again.
- */
- pdd = list_first_entry(&p->per_device_data,
- struct kfd_process_device,
- per_device_list);
-
- pr_debug("Started restoring pasid %d\n", p->pasid);
+ pr_debug("Started restoring pasid 0x%x\n", p->pasid);
/* Setting last_restore_timestamp before successful restoration.
* Otherwise this would have to be set by KGD (restore_process_bos)
@@ -1077,7 +1068,7 @@ static void restore_process_worker(struct work_struct *work)
ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
&p->ef);
if (ret) {
- pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
+ pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
p->pasid, PROCESS_BACK_OFF_TIME_MS);
ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
@@ -1087,9 +1078,9 @@ static void restore_process_worker(struct work_struct *work)
ret = kfd_process_restore_queues(p);
if (!ret)
- pr_debug("Finished restoring pasid %d\n", p->pasid);
+ pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
else
- pr_err("Failed to restore queues of pasid %d\n", p->pasid);
+ pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
}
void kfd_suspend_all_processes(void)
@@ -1103,7 +1094,7 @@ void kfd_suspend_all_processes(void)
cancel_delayed_work_sync(&p->restore_work);
if (kfd_process_evict_queues(p))
- pr_err("Failed to suspend process %d\n", p->pasid);
+ pr_err("Failed to suspend process 0x%x\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
p->ef = NULL;
@@ -1162,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
void kfd_flush_tlb(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
- const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
/* Nothing to flush until a VMID is assigned, which
* only happens when the first queue is created.
*/
if (pdd->qpd.vmid)
- f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+ amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+ pdd->qpd.vmid);
} else {
- f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+ pdd->process->pasid);
}
}
@@ -1186,7 +1178,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- seq_printf(m, "Process %d PASID %d:\n",
+ seq_printf(m, "Process %d PASID 0x%x:\n",
p->lead_thread->tgid, p->pasid);
mutex_lock(&p->mutex);
OpenPOWER on IntegriCloud