diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 229 |
1 files changed, 134 insertions, 95 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 264c5968a1d3..b6c011b83641 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -50,6 +50,9 @@ * SI supports 16. */ +/* Special value that no flush is necessary */ +#define AMDGPU_VM_NO_FLUSH (~0ll) + /** * amdgpu_vm_num_pde - return the number of page directory entries * @@ -92,6 +95,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, entry->priority = 0; entry->tv.bo = &vm->page_directory->tbo; entry->tv.shared = true; + entry->user_pages = NULL; list_add(&entry->tv.head, validated); } @@ -157,50 +161,77 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, * Allocate an id for the vm, adding fences to the sync obj as necessary. */ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct fence *fence) + struct amdgpu_sync *sync, struct fence *fence, + unsigned *vm_id, uint64_t *vm_pd_addr) { - struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; + uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); struct amdgpu_device *adev = ring->adev; - struct amdgpu_vm_manager_id *id; + struct amdgpu_vm_id *id = &vm->ids[ring->idx]; + struct fence *updates = sync->last_vm_update; int r; mutex_lock(&adev->vm_manager.lock); /* check if the id is still valid */ - if (vm_id->id) { + if (id->mgr_id) { + struct fence *flushed = id->flushed_updates; + bool is_later; long owner; - id = &adev->vm_manager.ids[vm_id->id]; - owner = atomic_long_read(&id->owner); - if (owner == (long)vm) { - list_move_tail(&id->list, &adev->vm_manager.ids_lru); - trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); + if (!flushed) + is_later = true; + else if (!updates) + is_later = false; + else + is_later = fence_is_later(updates, flushed); + + owner = atomic_long_read(&id->mgr_id->owner); + if (!is_later && owner == (long)id && + pd_addr == id->pd_gpu_addr) { + + r = amdgpu_sync_fence(ring->adev, sync, + id->mgr_id->active); + if (r) { + mutex_unlock(&adev->vm_manager.lock); + return r; + } + + fence_put(id->mgr_id->active); + id->mgr_id->active = fence_get(fence); + + list_move_tail(&id->mgr_id->list, + &adev->vm_manager.ids_lru); - fence_put(id->active); - id->active = fence_get(fence); + *vm_id = id->mgr_id - adev->vm_manager.ids; + *vm_pd_addr = AMDGPU_VM_NO_FLUSH; + trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, + *vm_pd_addr); mutex_unlock(&adev->vm_manager.lock); return 0; } } - /* we definately need to flush */ - vm_id->pd_gpu_addr = ~0ll; + id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru, + struct amdgpu_vm_manager_id, + list); - id = list_first_entry(&adev->vm_manager.ids_lru, - struct amdgpu_vm_manager_id, - list); - list_move_tail(&id->list, &adev->vm_manager.ids_lru); - atomic_long_set(&id->owner, (long)vm); + r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active); + if (!r) { + fence_put(id->mgr_id->active); + id->mgr_id->active = fence_get(fence); - vm_id->id = id - adev->vm_manager.ids; - trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); + fence_put(id->flushed_updates); + id->flushed_updates = fence_get(updates); - r = amdgpu_sync_fence(ring->adev, sync, id->active); + id->pd_gpu_addr = pd_addr; - if (!r) { - fence_put(id->active); - id->active = fence_get(fence); + list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru); + atomic_long_set(&id->mgr_id->owner, (long)id); + + *vm_id = id->mgr_id - adev->vm_manager.ids; + *vm_pd_addr = pd_addr; + trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); } mutex_unlock(&adev->vm_manager.lock); @@ -211,39 +242,71 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, * amdgpu_vm_flush - hardware flush the vm * * @ring: ring to use for flush - * @vm: vm we want to flush - * @updates: last vm update that we waited for + * @vm_id: vmid number to use + * @pd_addr: address of the page directory * - * Flush the vm. + * Emit a VM flush when it is necessary. */ void amdgpu_vm_flush(struct amdgpu_ring *ring, - struct amdgpu_vm *vm, - struct fence *updates) + unsigned vm_id, uint64_t pd_addr, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size) { - uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); - struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; - struct fence *flushed_updates = vm_id->flushed_updates; - bool is_later; - - if (!flushed_updates) - is_later = true; - else if (!updates) - is_later = false; - else - is_later = fence_is_later(updates, flushed_updates); + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; + bool gds_switch_needed = ring->funcs->emit_gds_switch && ( + mgr_id->gds_base != gds_base || + mgr_id->gds_size != gds_size || + mgr_id->gws_base != gws_base || + mgr_id->gws_size != gws_size || + mgr_id->oa_base != oa_base || + mgr_id->oa_size != oa_size); + + if (ring->funcs->emit_pipeline_sync && ( + pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed)) + amdgpu_ring_emit_pipeline_sync(ring); + + if (pd_addr != AMDGPU_VM_NO_FLUSH) { + trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); + amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); + } - if (pd_addr != vm_id->pd_gpu_addr || is_later) { - trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); - if (is_later) { - vm_id->flushed_updates = fence_get(updates); - fence_put(flushed_updates); - } - vm_id->pd_gpu_addr = pd_addr; - amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); + if (gds_switch_needed) { + mgr_id->gds_base = gds_base; + mgr_id->gds_size = gds_size; + mgr_id->gws_base = gws_base; + mgr_id->gws_size = gws_size; + mgr_id->oa_base = oa_base; + mgr_id->oa_size = oa_size; + amdgpu_ring_emit_gds_switch(ring, vm_id, + gds_base, gds_size, + gws_base, gws_size, + oa_base, oa_size); } } /** + * amdgpu_vm_reset_id - reset VMID to zero + * + * @adev: amdgpu device structure + * @vm_id: vmid number to use + * + * Reset saved GDW, GWS and OA to force switch on next flush. + */ +void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id) +{ + struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; + + mgr_id->gds_base = 0; + mgr_id->gds_size = 0; + mgr_id->gws_base = 0; + mgr_id->gws_size = 0; + mgr_id->oa_base = 0; + mgr_id->oa_size = 0; +} + +/** * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo * * @vm: requested vm @@ -804,7 +867,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, while (start != mapping->it.last + 1) { uint64_t last; - last = min((uint64_t)mapping->it.last, start + max_size); + last = min((uint64_t)mapping->it.last, start + max_size - 1); r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, start, last, flags, addr, fence); @@ -812,7 +875,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, return r; start = last + 1; - addr += max_size; + addr += max_size * AMDGPU_GPU_PAGE_SIZE; } return 0; @@ -908,22 +971,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping; int r; - spin_lock(&vm->freed_lock); while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - spin_unlock(&vm->freed_lock); + r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping, 0, NULL); kfree(mapping); if (r) return r; - spin_lock(&vm->freed_lock); } - spin_unlock(&vm->freed_lock); - return 0; } @@ -950,9 +1009,8 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, vm_status); spin_unlock(&vm->status_lock); - mutex_lock(&bo_va->mutex); + r = amdgpu_vm_bo_update(adev, bo_va, NULL); - mutex_unlock(&bo_va->mutex); if (r) return r; @@ -996,7 +1054,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); INIT_LIST_HEAD(&bo_va->vm_status); - mutex_init(&bo_va->mutex); + list_add_tail(&bo_va->bo_list, &bo->va); return bo_va; @@ -1048,9 +1106,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - spin_lock(&vm->it_lock); it = interval_tree_iter_first(&vm->va, saddr, eaddr); - spin_unlock(&vm->it_lock); if (it) { struct amdgpu_bo_va_mapping *tmp; tmp = container_of(it, struct amdgpu_bo_va_mapping, it); @@ -1074,13 +1130,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, mapping->offset = offset; mapping->flags = flags; - mutex_lock(&bo_va->mutex); list_add(&mapping->list, &bo_va->invalids); - mutex_unlock(&bo_va->mutex); - spin_lock(&vm->it_lock); interval_tree_insert(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); - trace_amdgpu_vm_bo_map(bo_va, mapping); /* Make sure the page tables are allocated */ saddr >>= amdgpu_vm_block_size; @@ -1124,6 +1175,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, entry->priority = 0; entry->tv.bo = &entry->robj->tbo; entry->tv.shared = true; + entry->user_pages = NULL; vm->page_tables[pt_idx].addr = 0; } @@ -1131,9 +1183,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, error_free: list_del(&mapping->list); - spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); kfree(mapping); @@ -1162,7 +1212,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, bool valid = true; saddr /= AMDGPU_GPU_PAGE_SIZE; - mutex_lock(&bo_va->mutex); + list_for_each_entry(mapping, &bo_va->valids, list) { if (mapping->it.start == saddr) break; @@ -1176,25 +1226,18 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, break; } - if (&mapping->list == &bo_va->invalids) { - mutex_unlock(&bo_va->mutex); + if (&mapping->list == &bo_va->invalids) return -ENOENT; - } } - mutex_unlock(&bo_va->mutex); + list_del(&mapping->list); - spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); - if (valid) { - spin_lock(&vm->freed_lock); + if (valid) list_add(&mapping->list, &vm->freed); - spin_unlock(&vm->freed_lock); - } else { + else kfree(mapping); - } return 0; } @@ -1223,23 +1266,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); - spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); - spin_lock(&vm->freed_lock); list_add(&mapping->list, &vm->freed); - spin_unlock(&vm->freed_lock); } list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { list_del(&mapping->list); - spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); kfree(mapping); } + fence_put(bo_va->last_pt_update); - mutex_destroy(&bo_va->mutex); kfree(bo_va); } @@ -1284,7 +1321,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) int i, r; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - vm->ids[i].id = 0; + vm->ids[i].mgr_id = NULL; vm->ids[i].flushed_updates = NULL; } vm->va = RB_ROOT; @@ -1292,8 +1329,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); - spin_lock_init(&vm->it_lock); - spin_lock_init(&vm->freed_lock); + pd_size = amdgpu_vm_directory_size(adev); pd_entries = amdgpu_vm_num_pdes(adev); @@ -1380,14 +1416,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - unsigned id = vm->ids[i].id; + struct amdgpu_vm_id *id = &vm->ids[i]; - atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner, - (long)vm, 0); - fence_put(vm->ids[i].flushed_updates); + if (id->mgr_id) + atomic_long_cmpxchg(&id->mgr_id->owner, + (long)id, 0); + fence_put(id->flushed_updates); } - } /** @@ -1404,9 +1441,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&adev->vm_manager.ids_lru); /* skip over VMID 0, since it is the system VM */ - for (i = 1; i < adev->vm_manager.num_ids; ++i) + for (i = 1; i < adev->vm_manager.num_ids; ++i) { + amdgpu_vm_reset_id(adev, i); list_add_tail(&adev->vm_manager.ids[i].list, &adev->vm_manager.ids_lru); + } atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); } |