diff options
| author | Dave Airlie <airlied@redhat.com> | 2018-03-14 11:06:38 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2018-03-14 11:06:38 +1000 |
| commit | 6fa7324ac5489ad43c4b6351355b869bc5458bef (patch) | |
| tree | 97de1061f074d0a76c83d8cb364c67094a33a0fa /drivers/gpu/drm/amd/amdgpu | |
| parent | 0b8eeac5c6ca6dcb19cce04bf8910006ac73dbd3 (diff) | |
| parent | a11024457d348672b26b3d4581ed19c793399b48 (diff) | |
| download | blackbird-op-linux-6fa7324ac5489ad43c4b6351355b869bc5458bef.tar.gz blackbird-op-linux-6fa7324ac5489ad43c4b6351355b869bc5458bef.zip | |
Merge tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux into drm-next
Major points for this pull request:
- Add dGPU support for amdkfd initialization code and queue handling. It's
not complete support since the GPUVM part is missing (the under debate stuff).
- Enable PCIe atomics for dGPU if present
- Various adjustments to the amdgpu<-->amdkfd interface for dGPUs
- Refactor IOMMUv2 code to allow loading amdkfd without IOMMUv2 in the system
- Add HSA process eviction code in case of system memory pressure
- Various fixes and small changes
* tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux: (24 commits)
uapi: Fix type used in ioctl parameter structures
drm/amdkfd: Implement KFD process eviction/restore
drm/amdkfd: Add GPUVM virtual address space to PDD
drm/amdkfd: Remove unaligned memory access
drm/amdkfd: Centralize IOMMUv2 code and make it conditional
drm/amdgpu: Add submit IB function for KFD
drm/amdgpu: Add GPUVM memory management functions for KFD
drm/amdgpu: add amdgpu_sync_clone
drm/amdgpu: Update kgd2kfd_shared_resources for dGPU support
drm/amdgpu: Add KFD eviction fence
drm/amdgpu: Remove unused kfd2kgd interface
drm/amdgpu: Fix wrong mask in get_atc_vmid_pasid_mapping_pasid
drm/amdgpu: Fix header file dependencies
drm/amdgpu: Replace kgd_mem with amdgpu_bo for kernel pinned gtt mem
drm/amdgpu: remove useless BUG_ONs
drm/amdgpu: Enable KFD initialization on dGPUs
drm/amdkfd: Add dGPU device IDs and device info
drm/amdkfd: Add dGPU support to kernel_queue_init
drm/amdkfd: Add dGPU support to the MQD manager
drm/amdkfd: Add dGPU support to the device queue manager
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/Makefile | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 132 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 112 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 179 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 80 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 82 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1506 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 56 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 25 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 |
14 files changed, 2124 insertions, 64 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 353c937d947d..8522c2ea1f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -129,6 +129,8 @@ amdgpu-y += \ # add amdkfd interfaces amdgpu-y += \ amdgpu_amdkfd.o \ + amdgpu_amdkfd_fence.o \ + amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o # add cgs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 450426dbed92..8a23aa8f9c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -30,6 +30,8 @@ const struct kgd2kfd_calls *kgd2kfd; bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); +static const unsigned int compute_vmid_bitmap = 0xFF00; + int amdgpu_amdkfd_init(void) { int ret; @@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void) #else ret = -ENOENT; #endif + amdgpu_amdkfd_gpuvm_init_mem_limits(); return ret; } @@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: + case CHIP_HAWAII: kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); break; #endif case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; default: @@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) int last_valid_bit; if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = 0xFF00, + .compute_vmid_bitmap = compute_vmid_bitmap, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, - .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, + .gpuvm_size = min(adev->vm_manager.max_pfn + << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_VA_HOLE_START), + .drm_render_minor = adev->ddev->render->index }; /* this is going to have a few of the MSBs set that we need to @@ -204,19 +216,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **cpu_ptr) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - struct kgd_mem **mem = (struct kgd_mem **) mem_obj; + struct amdgpu_bo *bo = NULL; int r; - - BUG_ON(kgd == NULL); - BUG_ON(gpu_addr == NULL); - BUG_ON(cpu_ptr == NULL); - - *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if ((*mem) == NULL) - return -ENOMEM; + uint64_t gpu_addr_tmp = 0; + void *cpu_ptr_tmp = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); @@ -224,54 +230,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, } /* map the buffer */ - r = amdgpu_bo_reserve((*mem)->bo, true); + r = amdgpu_bo_reserve(bo, true); if (r) { dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed; } - r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, - &(*mem)->gpu_addr); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, + &gpu_addr_tmp); if (r) { dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } - *gpu_addr = (*mem)->gpu_addr; - r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); + r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); if (r) { dev_err(adev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed; } - *cpu_ptr = (*mem)->cpu_ptr; - amdgpu_bo_unreserve((*mem)->bo); + *mem_obj = bo; + *gpu_addr = gpu_addr_tmp; + *cpu_ptr = cpu_ptr_tmp; + + amdgpu_bo_unreserve(bo); return 0; allocate_mem_kmap_bo_failed: - amdgpu_bo_unpin((*mem)->bo); + amdgpu_bo_unpin(bo); allocate_mem_pin_bo_failed: - amdgpu_bo_unreserve((*mem)->bo); + amdgpu_bo_unreserve(bo); allocate_mem_reserve_bo_failed: - amdgpu_bo_unref(&(*mem)->bo); + amdgpu_bo_unref(&bo); return r; } void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) { - struct kgd_mem *mem = (struct kgd_mem *) mem_obj; + struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; - BUG_ON(mem == NULL); - - amdgpu_bo_reserve(mem->bo, true); - amdgpu_bo_kunmap(mem->bo); - amdgpu_bo_unpin(mem->bo); - amdgpu_bo_unreserve(mem->bo); - amdgpu_bo_unref(&(mem->bo)); - kfree(mem); + amdgpu_bo_reserve(bo, true); + amdgpu_bo_kunmap(bo); + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&(bo)); } void get_local_mem_info(struct kgd_dev *kgd, @@ -361,3 +366,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); } + +int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct amdgpu_ring *ring; + struct dma_fence *f = NULL; + int ret; + + switch (engine) { + case KGD_ENGINE_MEC1: + ring = &adev->gfx.compute_ring[0]; + break; + case KGD_ENGINE_SDMA1: + ring = &adev->sdma.instance[0].ring; + break; + case KGD_ENGINE_SDMA2: + ring = &adev->sdma.instance[1].ring; + break; + default: + pr_err("Invalid engine in IB submission: %d\n", engine); + ret = -EINVAL; + goto err; + } + + ret = amdgpu_job_alloc(adev, 1, &job, NULL); + if (ret) + goto err; + + ib = &job->ibs[0]; + memset(ib, 0, sizeof(struct amdgpu_ib)); + + ib->gpu_addr = gpu_addr; + ib->ptr = ib_cmd; + ib->length_dw = ib_len; + /* This works for NO_HWS. TODO: need to handle without knowing VMID */ + job->vmid = vmid; + + ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); + if (ret) { + DRM_ERROR("amdgpu: failed to schedule IB.\n"); + goto err_ib_sched; + } + + ret = dma_fence_wait(f, false); + +err_ib_sched: + dma_fence_put(f); + amdgpu_job_free(job); +err: + return ret; +} + +bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) +{ + if (adev->kfd) { + if ((1 << vmid) & compute_vmid_bitmap) + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2a519f9062ee..d7509b706b26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,13 +28,89 @@ #include <linux/types.h> #include <linux/mmu_context.h> #include <kgd_kfd_interface.h> +#include <drm/ttm/ttm_execbuf_util.h> +#include "amdgpu_sync.h" +#include "amdgpu_vm.h" + +extern const struct kgd2kfd_calls *kgd2kfd; struct amdgpu_device; +struct kfd_bo_va_list { + struct list_head bo_list; + struct amdgpu_bo_va *bo_va; + void *kgd_dev; + bool is_mapped; + uint64_t va; + uint64_t pte_flags; +}; + struct kgd_mem { + struct mutex lock; struct amdgpu_bo *bo; - uint64_t gpu_addr; - void *cpu_ptr; + struct list_head bo_va_list; + /* protected by amdkfd_process_info.lock */ + struct ttm_validate_buffer validate_list; + struct ttm_validate_buffer resv_list; + uint32_t domain; + unsigned int mapped_to_gpu_memory; + uint64_t va; + + uint32_t mapping_flags; + + struct amdkfd_process_info *process_info; + + struct amdgpu_sync sync; + + bool aql_queue; +}; + +/* KFD Memory Eviction */ +struct amdgpu_amdkfd_fence { + struct dma_fence base; + struct mm_struct *mm; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; +}; + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm); +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); + +struct amdkfd_process_info { + /* List head of all VMs that belong to a KFD process */ + struct list_head vm_list_head; + /* List head for all KFD BOs that belong to a KFD process. */ + struct list_head kfd_bo_list; + /* Lock to protect kfd_bo_list */ + struct mutex lock; + + /* Number of VMs */ + unsigned int n_vms; + /* Eviction Fence */ + struct amdgpu_amdkfd_fence *eviction_fence; +}; + +/* struct amdkfd_vm - + * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs + * belonging to a KFD process. All the VMs belonging to the same process point + * to the same amdkfd_process_info. + */ +struct amdkfd_vm { + /* Keep base as the first parameter for pointer compatibility between + * amdkfd_vm and amdgpu_vm. + */ + struct amdgpu_vm base; + + /* List node in amdkfd_process_info.vm_list_head*/ + struct list_head vm_list_node; + + struct amdgpu_device *adev; + /* Points to the KFD process VM info*/ + struct amdkfd_process_info *process_info; + + uint64_t pd_phys_addr; }; int amdgpu_amdkfd_init(void); @@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); + /* Shared API */ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); valid; \ }) +/* GPUVM API */ +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef); +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags); +int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem); +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr, uint64_t *size); +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, + struct dma_fence **ef); + +void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c new file mode 100644 index 000000000000..2c14025e5e76 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -0,0 +1,179 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/dma-fence.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/stacktrace.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/sched/mm.h> +#include "amdgpu_amdkfd.h" + +static const struct dma_fence_ops amdkfd_fence_ops; +static atomic_t fence_seq = ATOMIC_INIT(0); + +/* Eviction Fence + * Fence helper functions to deal with KFD memory eviction. + * Big Idea - Since KFD submissions are done by user queues, a BO cannot be + * evicted unless all the user queues for that process are evicted. + * + * All the BOs in a process share an eviction fence. When process X wants + * to map VRAM memory but TTM can't find enough space, TTM will attempt to + * evict BOs from its LRU list. TTM checks if the BO is valuable to evict + * by calling ttm_bo_driver->eviction_valuable(). + * + * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs + * to process X. Otherwise, it will return true to indicate BO can be + * evicted by TTM. + * + * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue + * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move + * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler. + * + * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to + * nofity when the BO is free to move. fence_add_callback --> enable_signaling + * --> amdgpu_amdkfd_fence.enable_signaling + * + * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce + * user queues and signal fence. The work item will also start another delayed + * work item to restore BOs + */ + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm) +{ + struct amdgpu_amdkfd_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return NULL; + + /* This reference gets released in amdkfd_fence_release */ + mmgrab(mm); + fence->mm = mm; + get_task_comm(fence->timeline_name, current); + spin_lock_init(&fence->lock); + + dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock, + context, atomic_inc_return(&fence_seq)); + + return fence; +} + +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence; + + if (!f) + return NULL; + + fence = container_of(f, struct amdgpu_amdkfd_fence, base); + if (fence && f->ops == &amdkfd_fence_ops) + return fence; + + return NULL; +} + +static const char *amdkfd_fence_get_driver_name(struct dma_fence *f) +{ + return "amdgpu_amdkfd_fence"; +} + +static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + return fence->timeline_name; +} + +/** + * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict + * a KFD BO and schedules a job to move the BO. + * If fence is already signaled return true. + * If fence is not signaled schedule a evict KFD process work item. + */ +static bool amdkfd_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + + if (dma_fence_is_signaled(f)) + return true; + + if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f)) + return true; + + return false; +} + +/** + * amdkfd_fence_release - callback that fence can be freed + * + * @fence: fence + * + * This function is called when the reference count becomes zero. + * Drops the mm_struct reference and RCU schedules freeing up the fence. + */ +static void amdkfd_fence_release(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + /* Unconditionally signal the fence. The process is getting + * terminated. + */ + if (WARN_ON(!fence)) + return; /* Not an amdgpu_amdkfd_fence */ + + mmdrop(fence->mm); + kfree_rcu(f, rcu); +} + +/** + * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f + * if same return TRUE else return FALSE. + * + * @f: [IN] fence + * @mm: [IN] mm that needs to be verified + */ +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + else if (fence->mm == mm) + return true; + + return false; +} + +static const struct dma_fence_ops amdkfd_fence_ops = { + .get_driver_name = amdkfd_fence_get_driver_name, + .get_timeline_name = amdkfd_fence_get_timeline_name, + .enable_signaling = amdkfd_fence_enable_signaling, + .signaled = NULL, + .wait = dma_fence_default_wait, + .release = amdkfd_fence_release, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index a9e6aea0e5f8..7485c376b90e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -196,12 +199,25 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -787,14 +803,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *) kgd; reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static void set_scratch_backing_va(struct kgd_dev *kgd, @@ -812,8 +821,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; const union amdgpu_firmware_header *hdr; - BUG_ON(kgd == NULL); - switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) @@ -866,3 +873,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) return hdr->common.ucode_version; } +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid\n"); + return 0; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index b127259d7d85..7be453494423 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t queue_id); static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int utimeout); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static int kgd_address_watch_disable(struct kgd_dev *kgd); static int kgd_address_watch_execute(struct kgd_dev *kgd, unsigned int watch_point_id, @@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -157,12 +159,25 @@ static const struct kfd2kgd_calls kfd2kgd = { get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -704,14 +719,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *) kgd; reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static int kgd_address_watch_disable(struct kgd_dev *kgd) @@ -775,8 +783,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; const union amdgpu_firmware_header *hdr; - BUG_ON(kgd == NULL); - switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) @@ -828,3 +834,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) /* Only 12 bit in use*/ return hdr->common.ucode_version; } + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return -EINVAL; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c new file mode 100644 index 000000000000..e0371a9967b9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -0,0 +1,1506 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/list.h> +#include <drm/drmP.h> +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_amdkfd.h" + +/* Special VM and GART address alignment needed for VI pre-Fiji due to + * a HW bug. + */ +#define VI_BO_SIZE_ALIGN (0x8000) + +/* Impose limit on how much memory KFD can use */ +static struct { + uint64_t max_system_mem_limit; + int64_t system_mem_used; + spinlock_t mem_limit_lock; +} kfd_mem_limit; + +/* Struct used for amdgpu_amdkfd_bo_validate */ +struct amdgpu_vm_parser { + uint32_t domain; + bool wait; +}; + +static const char * const domain_bit_to_string[] = { + "CPU", + "GTT", + "VRAM", + "GDS", + "GWS", + "OA" +}; + +#define domain_string(domain) domain_bit_to_string[ffs(domain)-1] + + + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, + struct kgd_mem *mem) +{ + struct kfd_bo_va_list *entry; + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) + if (entry->bo_va->base.vm == avm) + return false; + + return true; +} + +/* Set memory usage limits. Current, limits are + * System (kernel) memory - 3/8th System RAM + */ +void amdgpu_amdkfd_gpuvm_init_mem_limits(void) +{ + struct sysinfo si; + uint64_t mem; + + si_meminfo(&si); + mem = si.totalram - si.totalhigh; + mem *= si.mem_unit; + + spin_lock_init(&kfd_mem_limit.mem_limit_lock); + kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); + pr_debug("Kernel memory limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20)); +} + +static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + int ret = 0; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) { + if (kfd_mem_limit.system_mem_used + (acc_size + size) > + kfd_mem_limit.max_system_mem_limit) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += (acc_size + size); + } +err_no_mem: + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return ret; +} + +static void unreserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) + kfd_mem_limit.system_mem_used -= (acc_size + size); + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +{ + spin_lock(&kfd_mem_limit.mem_limit_lock); + + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + kfd_mem_limit.system_mem_used -= + (bo->tbo.acc_size + amdgpu_bo_size(bo)); + } + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + + +/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's + * reservation object. + * + * @bo: [IN] Remove eviction fence(s) from this BO + * @ef: [IN] If ef is specified, then this eviction fence is removed if it + * is present in the shared list. + * @ef_list: [OUT] Returns list of eviction fences. These fences are removed + * from BO's reservation object shared list. + * @ef_count: [OUT] Number of fences in ef_list. + * + * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be + * called to restore the eviction fences and to avoid memory leak. This is + * useful for shared BOs. + * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. + */ +static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence *ef, + struct amdgpu_amdkfd_fence ***ef_list, + unsigned int *ef_count) +{ + struct reservation_object_list *fobj; + struct reservation_object *resv; + unsigned int i = 0, j = 0, k = 0, shared_count; + unsigned int count = 0; + struct amdgpu_amdkfd_fence **fence_list; + + if (!ef && !ef_list) + return -EINVAL; + + if (ef_list) { + *ef_list = NULL; + *ef_count = 0; + } + + resv = bo->tbo.resv; + fobj = reservation_object_get_list(resv); + + if (!fobj) + return 0; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + /* Go through all the shared fences in the resevation object. If + * ef is specified and it exists in the list, remove it and reduce the + * count. If ef is not specified, then get the count of eviction fences + * present. + */ + shared_count = fobj->shared_count; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + if (ef) { + if (f->context == ef->base.context) { + dma_fence_put(f); + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } else if (to_amdgpu_amdkfd_fence(f)) + count++; + } + write_seqcount_end(&resv->seq); + preempt_enable(); + + if (ef || !count) + return 0; + + /* Alloc memory for count number of eviction fence pointers. Fill the + * ef_list array and ef_count + */ + fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *), + GFP_KERNEL); + if (!fence_list) + return -ENOMEM; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + j = 0; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + struct amdgpu_amdkfd_fence *efence; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + efence = to_amdgpu_amdkfd_fence(f); + if (efence) { + fence_list[k++] = efence; + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } + + write_seqcount_end(&resv->seq); + preempt_enable(); + + *ef_list = fence_list; + *ef_count = k; + + return 0; +} + +/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's + * reservation object. + * + * @bo: [IN] Add eviction fences to this BO + * @ef_list: [IN] List of eviction fences to be added + * @ef_count: [IN] Number of fences in ef_list. + * + * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this + * function. + */ +static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence **ef_list, + unsigned int ef_count) +{ + int i; + + if (!ef_list || !ef_count) + return; + + for (i = 0; i < ef_count; i++) { + amdgpu_bo_fence(bo, &ef_list[i]->base, true); + /* Re-adding the fence takes an additional reference. Drop that + * reference. + */ + dma_fence_put(&ef_list[i]->base); + } + + kfree(ef_list); +} + +static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, + bool wait) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), + "Called with userptr BO")) + return -EINVAL; + + amdgpu_ttm_placement_from_domain(bo, domain); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + goto validate_fail; + if (wait) { + struct amdgpu_amdkfd_fence **ef_list; + unsigned int ef_count; + + ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, + &ef_count); + if (ret) + goto validate_fail; + + ttm_bo_wait(&bo->tbo, false, false); + amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); + } + +validate_fail: + return ret; +} + +static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) +{ + struct amdgpu_vm_parser *p = param; + + return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); +} + +/* vm_validate_pt_pd_bos - Validate page table and directory BOs + * + * Page directories are not updated here because huge page handling + * during page table updates can invalidate page directory entries + * again. Page directories are only updated after updating page + * tables. + */ +static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) +{ + struct amdgpu_bo *pd = vm->base.root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + struct amdgpu_vm_parser param; + uint64_t addr, flags = AMDGPU_PTE_VALID; + int ret; + + param.domain = AMDGPU_GEM_DOMAIN_VRAM; + param.wait = false; + + ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, + ¶m); + if (ret) { + pr_err("amdgpu: failed to validate PT BOs\n"); + return ret; + } + + ret = amdgpu_amdkfd_validate(¶m, pd); + if (ret) { + pr_err("amdgpu: failed to validate PD\n"); + return ret; + } + + addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); + amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); + vm->pd_phys_addr = addr; + + if (vm->base.use_cpu_for_update) { + ret = amdgpu_bo_kmap(pd, NULL); + if (ret) { + pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); + return ret; + } + } + + return 0; +} + +static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, + struct dma_fence *f) +{ + int ret = amdgpu_sync_fence(adev, sync, f, false); + + /* Sync objects can't handle multiple GPUs (contexts) updating + * sync->last_vm_update. Fortunately we don't need it for + * KFD's purposes, so we can just drop that fence. + */ + if (sync->last_vm_update) { + dma_fence_put(sync->last_vm_update); + sync->last_vm_update = NULL; + } + + return ret; +} + +static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) +{ + struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + int ret; + + ret = amdgpu_vm_update_directories(adev, vm); + if (ret) + return ret; + + return sync_vm_fence(adev, sync, vm->last_update); +} + +/* add_bo_to_vm - Add a BO to a VM + * + * Everything that needs to bo done only once when a BO is first added + * to a VM. It can later be mapped and unmapped many times without + * repeating these steps. + * + * 1. Allocate and initialize BO VA entry data structure + * 2. Add BO to the VM + * 3. Determine ASIC-specific PTE flags + * 4. Alloc page tables and directories if needed + * 4a. Validate new page tables and directories + */ +static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, + struct amdgpu_vm *avm, bool is_aql, + struct kfd_bo_va_list **p_bo_va_entry) +{ + int ret; + struct kfd_bo_va_list *bo_va_entry; + struct amdkfd_vm *kvm = container_of(avm, + struct amdkfd_vm, base); + struct amdgpu_bo *pd = avm->root.base.bo; + struct amdgpu_bo *bo = mem->bo; + uint64_t va = mem->va; + struct list_head *list_bo_va = &mem->bo_va_list; + unsigned long bo_size = bo->tbo.mem.size; + + if (!va) { + pr_err("Invalid VA when adding BO to VM\n"); + return -EINVAL; + } + + if (is_aql) + va += bo_size; + + bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); + if (!bo_va_entry) + return -ENOMEM; + + pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, + va + bo_size, avm); + + /* Add BO to VM internal data structures*/ + bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); + if (!bo_va_entry->bo_va) { + ret = -EINVAL; + pr_err("Failed to add BO object to VM. ret == %d\n", + ret); + goto err_vmadd; + } + + bo_va_entry->va = va; + bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, + mem->mapping_flags); + bo_va_entry->kgd_dev = (void *)adev; + list_add(&bo_va_entry->bo_list, list_bo_va); + + if (p_bo_va_entry) + *p_bo_va_entry = bo_va_entry; + + /* Allocate new page tables if needed and validate + * them. Clearing of new page tables and validate need to wait + * on move fences. We don't want that to trigger the eviction + * fence, so remove it temporarily. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + + ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); + if (ret) { + pr_err("Failed to allocate pts, err=%d\n", ret); + goto err_alloc_pts; + } + + ret = vm_validate_pt_pd_bos(kvm); + if (ret) { + pr_err("validate_pt_pd_bos() failed\n"); + goto err_alloc_pts; + } + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + + return 0; + +err_alloc_pts: + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); + list_del(&bo_va_entry->bo_list); +err_vmadd: + kfree(bo_va_entry); + return ret; +} + +static void remove_bo_from_vm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, unsigned long size) +{ + pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", + entry->va, + entry->va + size, entry); + amdgpu_vm_bo_rmv(adev, entry->bo_va); + list_del(&entry->bo_list); + kfree(entry); +} + +static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + struct ttm_validate_buffer *entry = &mem->validate_list; + struct amdgpu_bo *bo = mem->bo; + + INIT_LIST_HEAD(&entry->head); + entry->shared = true; + entry->bo = &bo->tbo; + mutex_lock(&process_info->lock); + list_add_tail(&entry->head, &process_info->kfd_bo_list); + mutex_unlock(&process_info->lock); +} + +/* Reserving a BO and its page table BOs must happen atomically to + * avoid deadlocks. Some operations update multiple VMs at once. Track + * all the reservation info in a context structure. Optionally a sync + * object can track VM updates. + */ +struct bo_vm_reservation_context { + struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ + unsigned int n_vms; /* Number of VMs reserved */ + struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ + struct ww_acquire_ctx ticket; /* Reservation ticket */ + struct list_head list, duplicates; /* BO lists */ + struct amdgpu_sync *sync; /* Pointer to sync object */ + bool reserved; /* Whether BOs are reserved */ +}; + +enum bo_vm_match { + BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ + BO_VM_MAPPED, /* Match VMs where a BO is mapped */ + BO_VM_ALL, /* Match all VMs a BO was added to */ +}; + +/** + * reserve_bo_and_vm - reserve a BO and a VM unconditionally. + * @mem: KFD BO structure. + * @vm: the VM to reserve. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + */ +static int reserve_bo_and_vm(struct kgd_mem *mem, + struct amdgpu_vm *vm, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + int ret; + + WARN_ON(!vm); + + ctx->reserved = false; + ctx->n_vms = 1; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else { + pr_err("Failed to reserve buffers in ttm\n"); + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally + * @mem: KFD BO structure. + * @vm: the VM to reserve. If NULL, then all VMs associated with the BO + * is used. Otherwise, a single VM associated with the BO. + * @map_type: the mapping status that will be used to filter the VMs. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + * + * Returns 0 for success, negative for failure. + */ +static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + struct amdgpu_vm *vm, enum bo_vm_match map_type, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + struct kfd_bo_va_list *entry; + unsigned int i; + int ret; + + ctx->reserved = false; + ctx->n_vms = 0; + ctx->vm_pd = NULL; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + ctx->n_vms++; + } + + if (ctx->n_vms != 0) { + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), + GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + } + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + i = 0; + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, + &ctx->vm_pd[i]); + i++; + } + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else + pr_err("Failed to reserve buffers in ttm.\n"); + + if (ret) { + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context + * @ctx: Reservation context to unreserve + * @wait: Optionally wait for a sync object representing pending VM updates + * @intr: Whether the wait is interruptible + * + * Also frees any resources allocated in + * reserve_bo_and_(cond_)vm(s). Returns the status from + * amdgpu_sync_wait. + */ +static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, + bool wait, bool intr) +{ + int ret = 0; + + if (wait) + ret = amdgpu_sync_wait(ctx->sync, intr); + + if (ctx->reserved) + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); + kfree(ctx->vm_pd); + + ctx->sync = NULL; + + ctx->reserved = false; + ctx->vm_pd = NULL; + + return ret; +} + +static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, + struct amdgpu_sync *sync) +{ + struct amdgpu_bo_va *bo_va = entry->bo_va; + struct amdgpu_vm *vm = bo_va->base.vm; + struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base); + struct amdgpu_bo *pd = vm->root.base.bo; + + /* Remove eviction fence from PD (and thereby from PTs too as + * they share the resv. object). Otherwise during PT update + * job (see amdgpu_vm_bo_update_mapping), eviction fence would + * get added to job->sync object and job execution would + * trigger the eviction fence. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + + amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + + sync_vm_fence(adev, sync, bo_va->last_pt_update); + + return 0; +} + +static int update_gpuvm_pte(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, + struct amdgpu_sync *sync) +{ + int ret; + struct amdgpu_vm *vm; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; + + bo_va = entry->bo_va; + vm = bo_va->base.vm; + bo = bo_va->base.bo; + + /* Update the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) { + pr_err("amdgpu_vm_bo_update failed\n"); + return ret; + } + + return sync_vm_fence(adev, sync, bo_va->last_pt_update); +} + +static int map_bo_to_gpuvm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) +{ + int ret; + + /* Set virtual address for the allocation */ + ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, + amdgpu_bo_size(entry->bo_va->base.bo), + entry->pte_flags); + if (ret) { + pr_err("Failed to map VA 0x%llx in vm. ret %d\n", + entry->va, ret); + return ret; + } + + ret = update_gpuvm_pte(adev, entry, sync); + if (ret) { + pr_err("update_gpuvm_pte() failed\n"); + goto update_gpuvm_pte_failed; + } + + return 0; + +update_gpuvm_pte_failed: + unmap_bo_from_gpuvm(adev, entry, sync); + return ret; +} + +static int process_validate_vms(struct amdkfd_process_info *process_info) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_validate_pt_pd_bos(peer_vm); + if (ret) + return ret; + } + + return 0; +} + +static int process_update_pds(struct amdkfd_process_info *process_info, + struct amdgpu_sync *sync) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_update_pds(&peer_vm->base, sync); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef) +{ + int ret; + struct amdkfd_vm *new_vm; + struct amdkfd_process_info *info; + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); + if (!new_vm) + return -ENOMEM; + + /* Initialize the VM context, allocate the page directory and zero it */ + ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0); + if (ret) { + pr_err("Failed init vm ret %d\n", ret); + goto vm_init_fail; + } + new_vm->adev = adev; + + if (!*process_info) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto alloc_process_info_fail; + } + + mutex_init(&info->lock); + INIT_LIST_HEAD(&info->vm_list_head); + INIT_LIST_HEAD(&info->kfd_bo_list); + + info->eviction_fence = + amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), + current->mm); + if (!info->eviction_fence) { + pr_err("Failed to create eviction fence\n"); + goto create_evict_fence_fail; + } + + *process_info = info; + *ef = dma_fence_get(&info->eviction_fence->base); + } + + new_vm->process_info = *process_info; + + mutex_lock(&new_vm->process_info->lock); + list_add_tail(&new_vm->vm_list_node, + &(new_vm->process_info->vm_list_head)); + new_vm->process_info->n_vms++; + mutex_unlock(&new_vm->process_info->lock); + + *vm = (void *) new_vm; + + pr_debug("Created process vm %p\n", *vm); + + return ret; + +create_evict_fence_fail: + mutex_destroy(&info->lock); + kfree(info); +alloc_process_info_fail: + amdgpu_vm_fini(adev, &new_vm->base); +vm_init_fail: + kfree(new_vm); + return ret; + +} + +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm; + struct amdgpu_vm *avm = &kfd_vm->base; + struct amdgpu_bo *pd; + struct amdkfd_process_info *process_info; + + if (WARN_ON(!kgd || !vm)) + return; + + pr_debug("Destroying process vm %p\n", vm); + /* Release eviction fence from PD */ + pd = avm->root.base.bo; + amdgpu_bo_reserve(pd, false); + amdgpu_bo_fence(pd, NULL, false); + amdgpu_bo_unreserve(pd); + + process_info = kfd_vm->process_info; + + mutex_lock(&process_info->lock); + process_info->n_vms--; + list_del(&kfd_vm->vm_list_node); + mutex_unlock(&process_info->lock); + + /* Release per-process resources */ + if (!process_info->n_vms) { + WARN_ON(!list_empty(&process_info->kfd_bo_list)); + + dma_fence_put(&process_info->eviction_fence->base); + mutex_destroy(&process_info->lock); + kfree(process_info); + } + + /* Release the VM context */ + amdgpu_vm_fini(adev, avm); + kfree(vm); +} + +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +{ + struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; + + return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; +} + +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + struct amdgpu_bo *bo; + int byte_align; + u32 alloc_domain; + u64 alloc_flags; + uint32_t mapping_flags; + int ret; + + /* + * Check on which domain to allocate BO + */ + if (flags & ALLOC_MEM_FLAGS_VRAM) { + alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; + alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; + alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : + AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + } else if (flags & ALLOC_MEM_FLAGS_GTT) { + alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else { + return -EINVAL; + } + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); + + /* Workaround for AQL queue wraparound bug. Map the same + * memory twice. That means we only actually allocate half + * the memory. + */ + if ((*mem)->aql_queue) + size = size >> 1; + + /* Workaround for TLB bug on older VI chips */ + byte_align = (adev->family == AMDGPU_FAMILY_VI && + adev->asic_type != CHIP_FIJI && + adev->asic_type != CHIP_POLARIS10 && + adev->asic_type != CHIP_POLARIS11) ? + VI_BO_SIZE_ALIGN : 1; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (flags & ALLOC_MEM_FLAGS_WRITABLE) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + if (flags & ALLOC_MEM_FLAGS_COHERENT) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + (*mem)->mapping_flags = mapping_flags; + + amdgpu_sync_create(&(*mem)->sync); + + ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + if (ret) { + pr_debug("Insufficient system memory\n"); + goto err_reserve_system_mem; + } + + pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", + va, size, domain_string(alloc_domain)); + + ret = amdgpu_bo_create(adev, size, byte_align, false, + alloc_domain, alloc_flags, NULL, NULL, &bo); + if (ret) { + pr_debug("Failed to create BO on domain %s. ret %d\n", + domain_string(alloc_domain), ret); + goto err_bo_create; + } + bo->kfd_bo = *mem; + (*mem)->bo = bo; + + (*mem)->va = va; + (*mem)->domain = alloc_domain; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = kfd_vm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info); + + if (offset) + *offset = amdgpu_bo_mmap_offset(bo); + + return 0; + +err_bo_create: + unreserve_system_mem_limit(adev, size, alloc_domain); +err_reserve_system_mem: + mutex_destroy(&(*mem)->lock); + kfree(*mem); + return ret; +} + +int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem) +{ + struct amdkfd_process_info *process_info = mem->process_info; + unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry, *tmp; + struct bo_vm_reservation_context ctx; + struct ttm_validate_buffer *bo_list_entry; + int ret; + + mutex_lock(&mem->lock); + + if (mem->mapped_to_gpu_memory > 0) { + pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", + mem->va, bo_size); + mutex_unlock(&mem->lock); + return -EBUSY; + } + + mutex_unlock(&mem->lock); + /* lock is not needed after this, since mem is unused and will + * be freed anyway + */ + + /* Make sure restore workers don't access the BO any more */ + bo_list_entry = &mem->validate_list; + mutex_lock(&process_info->lock); + list_del(&bo_list_entry->head); + mutex_unlock(&process_info->lock); + + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); + if (unlikely(ret)) + return ret; + + /* The eviction fence should be removed by the last unmap. + * TODO: Log an error condition if the bo still has the eviction fence + * attached + */ + amdgpu_amdkfd_remove_eviction_fence(mem->bo, + process_info->eviction_fence, + NULL, NULL); + pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, + mem->va + bo_size * (1 + mem->aql_queue)); + + /* Remove from VM internal data structures */ + list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) + remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, + entry, bo_size); + + ret = unreserve_bo_and_vms(&ctx, false, false); + + /* Free the sync object */ + amdgpu_sync_free(&mem->sync); + + /* Free the BO*/ + amdgpu_bo_unref(&mem->bo); + mutex_destroy(&mem->lock); + kfree(mem); + + return ret; +} + +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + int ret; + struct amdgpu_bo *bo; + uint32_t domain; + struct kfd_bo_va_list *entry; + struct bo_vm_reservation_context ctx; + struct kfd_bo_va_list *bo_va_entry = NULL; + struct kfd_bo_va_list *bo_va_entry_aql = NULL; + unsigned long bo_size; + + /* Make sure restore is not running concurrently. + */ + mutex_lock(&mem->process_info->lock); + + mutex_lock(&mem->lock); + + bo = mem->bo; + + if (!bo) { + pr_err("Invalid BO when mapping memory to GPU\n"); + ret = -EINVAL; + goto out; + } + + domain = mem->domain; + bo_size = bo->tbo.mem.size; + + pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", + mem->va, + mem->va + bo_size * (1 + mem->aql_queue), + vm, domain_string(domain)); + + ret = reserve_bo_and_vm(mem, vm, &ctx); + if (unlikely(ret)) + goto out; + + if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, + &bo_va_entry); + if (ret) + goto add_bo_to_vm_failed; + if (mem->aql_queue) { + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, + true, &bo_va_entry_aql); + if (ret) + goto add_bo_to_vm_failed_aql; + } + } else { + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret)) + goto add_bo_to_vm_failed; + } + + if (mem->mapped_to_gpu_memory == 0) { + /* Validate BO only once. The eviction fence gets added to BO + * the first time it is mapped. Validate will wait for all + * background evictions to complete. + */ + ret = amdgpu_amdkfd_bo_validate(bo, domain, true); + if (ret) { + pr_debug("Validate failed\n"); + goto map_bo_to_gpuvm_failed; + } + } + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if (entry->bo_va->base.vm == vm && !entry->is_mapped) { + pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", + entry->va, entry->va + bo_size, + entry); + + ret = map_bo_to_gpuvm(adev, entry, ctx.sync); + if (ret) { + pr_err("Failed to map radeon bo to gpuvm\n"); + goto map_bo_to_gpuvm_failed; + } + + ret = vm_update_pds(vm, ctx.sync); + if (ret) { + pr_err("Failed to update page directories\n"); + goto map_bo_to_gpuvm_failed; + } + + entry->is_mapped = true; + mem->mapped_to_gpu_memory++; + pr_debug("\t INC mapping count %d\n", + mem->mapped_to_gpu_memory); + } + } + + if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) + amdgpu_bo_fence(bo, + &kfd_vm->process_info->eviction_fence->base, + true); + ret = unreserve_bo_and_vms(&ctx, false, false); + + goto out; + +map_bo_to_gpuvm_failed: + if (bo_va_entry_aql) + remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); +add_bo_to_vm_failed_aql: + if (bo_va_entry) + remove_bo_from_vm(adev, bo_va_entry, bo_size); +add_bo_to_vm_failed: + unreserve_bo_and_vms(&ctx, false, false); +out: + mutex_unlock(&mem->process_info->lock); + mutex_unlock(&mem->lock); + return ret; +} + +int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_process_info *process_info = + ((struct amdkfd_vm *)vm)->process_info; + unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry; + struct bo_vm_reservation_context ctx; + int ret; + + mutex_lock(&mem->lock); + + ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); + if (unlikely(ret)) + goto out; + /* If no VMs were reserved, it means the BO wasn't actually mapped */ + if (ctx.n_vms == 0) { + ret = -EINVAL; + goto unreserve_out; + } + + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret)) + goto unreserve_out; + + pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", + mem->va, + mem->va + bo_size * (1 + mem->aql_queue), + vm); + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if (entry->bo_va->base.vm == vm && entry->is_mapped) { + pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", + entry->va, + entry->va + bo_size, + entry); + + ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); + if (ret == 0) { + entry->is_mapped = false; + } else { + pr_err("failed to unmap VA 0x%llx\n", + mem->va); + goto unreserve_out; + } + + mem->mapped_to_gpu_memory--; + pr_debug("\t DEC mapping count %d\n", + mem->mapped_to_gpu_memory); + } + } + + /* If BO is unmapped from all VMs, unfence it. It can be evicted if + * required. + */ + if (mem->mapped_to_gpu_memory == 0 && + !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) + amdgpu_amdkfd_remove_eviction_fence(mem->bo, + process_info->eviction_fence, + NULL, NULL); + +unreserve_out: + unreserve_bo_and_vms(&ctx, false, false); +out: + mutex_unlock(&mem->lock); + return ret; +} + +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) +{ + struct amdgpu_sync sync; + int ret; + + amdgpu_sync_create(&sync); + + mutex_lock(&mem->lock); + amdgpu_sync_clone(&mem->sync, &sync); + mutex_unlock(&mem->lock); + + ret = amdgpu_sync_wait(&sync, intr); + amdgpu_sync_free(&sync); + return ret; +} + +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr, uint64_t *size) +{ + int ret; + struct amdgpu_bo *bo = mem->bo; + + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + pr_err("userptr can't be mapped to kernel\n"); + return -EINVAL; + } + + /* delete kgd_mem from kfd_bo_list to avoid re-validating + * this BO in BO's restoring after eviction. + */ + mutex_lock(&mem->process_info->lock); + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("Failed to reserve bo. ret %d\n", ret); + goto bo_reserve_failed; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + if (ret) { + pr_err("Failed to pin bo. ret %d\n", ret); + goto pin_failed; + } + + ret = amdgpu_bo_kmap(bo, kptr); + if (ret) { + pr_err("Failed to map bo to kernel. ret %d\n", ret); + goto kmap_failed; + } + + amdgpu_amdkfd_remove_eviction_fence( + bo, mem->process_info->eviction_fence, NULL, NULL); + list_del_init(&mem->validate_list.head); + + if (size) + *size = amdgpu_bo_size(bo); + + amdgpu_bo_unreserve(bo); + + mutex_unlock(&mem->process_info->lock); + return 0; + +kmap_failed: + amdgpu_bo_unpin(bo); +pin_failed: + amdgpu_bo_unreserve(bo); +bo_reserve_failed: + mutex_unlock(&mem->process_info->lock); + + return ret; +} + +/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given + * KFD process identified by process_info + * + * @process_info: amdkfd_process_info of the KFD process + * + * After memory eviction, restore thread calls this function. The function + * should be called when the Process is still valid. BO restore involves - + * + * 1. Release old eviction fence and create new one + * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. + * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of + * BOs that need to be reserved. + * 4. Reserve all the BOs + * 5. Validate of PD and PT BOs. + * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence + * 7. Add fence to all PD and PT BOs. + * 8. Unreserve all BOs + */ +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) +{ + struct amdgpu_bo_list_entry *pd_bo_list; + struct amdkfd_process_info *process_info = info; + struct amdkfd_vm *peer_vm; + struct kgd_mem *mem; + struct bo_vm_reservation_context ctx; + struct amdgpu_amdkfd_fence *new_fence; + int ret = 0, i; + struct list_head duplicate_save; + struct amdgpu_sync sync_obj; + + INIT_LIST_HEAD(&duplicate_save); + INIT_LIST_HEAD(&ctx.list); + INIT_LIST_HEAD(&ctx.duplicates); + + pd_bo_list = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (!pd_bo_list) + return -ENOMEM; + + i = 0; + mutex_lock(&process_info->lock); + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, + &pd_bo_list[i++]); + + /* Reserve all BOs and page tables/directory. Add all BOs from + * kfd_bo_list to ctx.list + */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) { + + list_add_tail(&mem->resv_list.head, &ctx.list); + mem->resv_list.bo = mem->validate_list.bo; + mem->resv_list.shared = mem->validate_list.shared; + } + + ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, + false, &duplicate_save); + if (ret) { + pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); + goto ttm_reserve_fail; + } + + amdgpu_sync_create(&sync_obj); + + /* Validate PDs and PTs */ + ret = process_validate_vms(process_info); + if (ret) + goto validate_map_fail; + + /* Wait for PD/PTs validate to finish */ + /* FIXME: I think this isn't needed */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; + + ttm_bo_wait(&bo->tbo, false, false); + } + + /* Validate BOs and map them to GPUVM (update VM page tables). */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) { + + struct amdgpu_bo *bo = mem->bo; + uint32_t domain = mem->domain; + struct kfd_bo_va_list *bo_va_entry; + + ret = amdgpu_amdkfd_bo_validate(bo, domain, false); + if (ret) { + pr_debug("Memory eviction: Validate BOs failed. Try again\n"); + goto validate_map_fail; + } + + list_for_each_entry(bo_va_entry, &mem->bo_va_list, + bo_list) { + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, + &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PTE failed. Try again\n"); + goto validate_map_fail; + } + } + } + + /* Update page directories */ + ret = process_update_pds(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PDs failed. Try again\n"); + goto validate_map_fail; + } + + amdgpu_sync_wait(&sync_obj, false); + + /* Release old eviction fence and create new one, because fence only + * goes from unsignaled to signaled, fence cannot be reused. + * Use context and mm from the old fence. + */ + new_fence = amdgpu_amdkfd_fence_create( + process_info->eviction_fence->base.context, + process_info->eviction_fence->mm); + if (!new_fence) { + pr_err("Failed to create eviction fence\n"); + ret = -ENOMEM; + goto validate_map_fail; + } + dma_fence_put(&process_info->eviction_fence->base); + process_info->eviction_fence = new_fence; + *ef = dma_fence_get(&new_fence->base); + + /* Wait for validate to finish and attach new eviction fence */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + ttm_bo_wait(&mem->bo->tbo, false, false); + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + amdgpu_bo_fence(mem->bo, + &process_info->eviction_fence->base, true); + + /* Attach eviction fence to PD / PT BOs */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; + + amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + } + +validate_map_fail: + ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); + amdgpu_sync_free(&sync_obj); +ttm_reserve_fail: + mutex_unlock(&process_info->lock); + kfree(pd_bo_list); + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 216799ccb545..9157745fce14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -36,6 +36,7 @@ #include <drm/drm_cache.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" static bool amdgpu_need_backup(struct amdgpu_device *adev) { @@ -54,6 +55,9 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); + if (bo->kfd_bo) + amdgpu_amdkfd_unreserve_system_memory_limit(bo); + amdgpu_bo_kunmap(bo); drm_gem_object_release(&bo->gem_base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 1cef944ef98d..d4dbfe1f842e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -92,6 +92,8 @@ struct amdgpu_bo { struct list_head mn_list; struct list_head shadow_list; }; + + struct kgd_mem *kfd_bo; }; static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1d0d250cbfdf..1a5911882657 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -26,6 +26,7 @@ #include <drm/amdgpu_drm.h> #include <drm/gpu_scheduler.h> +#include <drm/drm_print.h> /* max number of rings */ #define AMDGPU_MAX_RINGS 18 @@ -35,8 +36,9 @@ #define AMDGPU_MAX_UVD_ENC_RINGS 2 /* some special values for the owner field */ -#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul) -#define AMDGPU_FENCE_OWNER_VM ((void*)1ul) +#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul) +#define AMDGPU_FENCE_OWNER_VM ((void *)1ul) +#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul) #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index df65c66dc956..2d6f5ec77a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -31,6 +31,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" struct amdgpu_sync_entry { struct hlist_node node; @@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, */ static void *amdgpu_sync_get_owner(struct dma_fence *f) { - struct drm_sched_fence *s_fence = to_drm_sched_fence(f); + struct drm_sched_fence *s_fence; + struct amdgpu_amdkfd_fence *kfd_fence; + + if (!f) + return AMDGPU_FENCE_OWNER_UNDEFINED; + s_fence = to_drm_sched_fence(f); if (s_fence) return s_fence->owner; + kfd_fence = to_amdgpu_amdkfd_fence(f); + if (kfd_fence) + return AMDGPU_FENCE_OWNER_KFD; + return AMDGPU_FENCE_OWNER_UNDEFINED; } @@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], reservation_object_held(resv)); + /* We only want to trigger KFD eviction fences on + * evict or move jobs. Skip KFD fences otherwise. + */ + fence_owner = amdgpu_sync_get_owner(f); + if (fence_owner == AMDGPU_FENCE_OWNER_KFD && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + continue; + if (amdgpu_sync_same_dev(adev, f)) { /* VM updates are only interesting * for other VM updates and moves. */ - fence_owner = amdgpu_sync_get_owner(f); if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && ((owner == AMDGPU_FENCE_OWNER_VM) != @@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit return NULL; } +/** + * amdgpu_sync_clone - clone a sync object + * + * @source: sync object to clone + * @clone: pointer to destination sync object + * + * Adds references to all unsignaled fences in @source to @clone. Also + * removes signaled fences from @source while at it. + */ +int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + struct dma_fence *f; + int i, r; + + hash_for_each_safe(source->fences, i, tmp, e, node) { + f = e->fence; + if (!dma_fence_is_signaled(f)) { + r = amdgpu_sync_fence(NULL, clone, f, e->explicit); + if (r) + return r; + } else { + hash_del(&e->node); + dma_fence_put(f); + kmem_cache_free(amdgpu_sync_slab, e); + } + } + + dma_fence_put(clone->last_vm_update); + clone->last_vm_update = dma_fence_get(source->last_vm_update); + + return 0; +} + int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) { struct amdgpu_sync_entry *e; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 7aba38d5c9df..10cf23a57f17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); +int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 28c33d711bab..c2fae04d769a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -46,6 +46,7 @@ #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) @@ -258,6 +259,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); + /* + * Don't verify access for KFD BOs. They don't have a GEM + * object associated with them. + */ + if (abo->kfd_bo) + return 0; + if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; return drm_vma_node_verify_access(&abo->gem_base.vma_node, @@ -1171,6 +1179,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, { unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; + struct reservation_object_list *flist; + struct dma_fence *f; + int i; + + /* If bo is a KFD BO, check if the bo belongs to the current process. + * If true, then return false as any KFD process needs all its BOs to + * be resident to run successfully + */ + flist = reservation_object_get_list(bo->resv); + if (flist) { + for (i = 0; i < flist->shared_count; ++i) { + f = rcu_dereference_protected(flist->shared[i], + reservation_object_held(bo->resv)); + if (amdkfd_fence_check_mm(f, current->mm)) + return false; + } + } switch (bo->mem.mem_type) { case TTM_PL_TT: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fabf44b262be..e9841518343e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -28,6 +28,7 @@ #include <linux/kfifo.h> #include <linux/rbtree.h> #include <drm/gpu_scheduler.h> +#include <drm/drm_file.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" |

