1 files changed, 149 insertions, 87 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index dfde266529e2..cde48c42b30a 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -125,41 +125,37 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
  * Add the page directory to the list of BOs to
  * validate for command submission (cayman+).
  */
-struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
+struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
 					  struct radeon_vm *vm,
 					  struct list_head *head)
 {
-	struct radeon_cs_reloc *list;
+	struct radeon_bo_list *list;
 	unsigned i, idx;
 
 	list = drm_malloc_ab(vm->max_pde_used + 2,
-			     sizeof(struct radeon_cs_reloc));
+			     sizeof(struct radeon_bo_list));
 	if (!list)
 		return NULL;
 
 	/* add the vm page table to the list */
-	list[0].gobj = NULL;
 	list[0].robj = vm->page_directory;
 	list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].tv.bo = &vm->page_directory->tbo;
-	list[0].tv.shared = false;
+	list[0].tv.shared = true;
 	list[0].tiling_flags = 0;
-	list[0].handle = 0;
 	list_add(&list[0].tv.head, head);
 
 	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
 		if (!vm->page_tables[i].bo)
 			continue;
 
-		list[idx].gobj = NULL;
 		list[idx].robj = vm->page_tables[i].bo;
 		list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].tv.bo = &list[idx].robj->tbo;
-		list[idx].tv.shared = false;
+		list[idx].tv.shared = true;
 		list[idx].tiling_flags = 0;
-		list[idx].handle = 0;
 		list_add(&list[idx++].tv.head, head);
 	}
 
@@ -182,15 +178,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 				       struct radeon_vm *vm, int ring)
 {
 	struct radeon_fence *best[RADEON_NUM_RINGS] = {};
+	struct radeon_vm_id *vm_id = &vm->ids[ring];
+
 	unsigned choices[2] = {};
 	unsigned i;
 
 	/* check if the id is still valid */
-	if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
+	if (vm_id->id && vm_id->last_id_use &&
+	    vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
 		return NULL;
 
 	/* we definately need to flush */
-	radeon_fence_unref(&vm->last_flush);
+	vm_id->pd_gpu_addr = ~0ll;
 
 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
@@ -198,8 +197,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 
 		if (fence == NULL) {
 			/* found a free one */
-			vm->id = i;
-			trace_radeon_vm_grab_id(vm->id, ring);
+			vm_id->id = i;
+			trace_radeon_vm_grab_id(i, ring);
 			return NULL;
 		}
 
@@ -211,8 +210,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 
 	for (i = 0; i < 2; ++i) {
 		if (choices[i]) {
-			vm->id = choices[i];
-			trace_radeon_vm_grab_id(vm->id, ring);
+			vm_id->id = choices[i];
+			trace_radeon_vm_grab_id(choices[i], ring);
 			return rdev->vm_manager.active[choices[i]];
 		}
 	}
@@ -228,6 +227,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
  * @rdev: radeon_device pointer
  * @vm: vm we want to flush
  * @ring: ring to use for flush
+ * @updates: last vm update that is waited for
  *
  * Flush the vm (cayman+).
  *
@@ -235,15 +235,21 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
  */
 void radeon_vm_flush(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
-		     int ring)
+		     int ring, struct radeon_fence *updates)
 {
 	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
+	struct radeon_vm_id *vm_id = &vm->ids[ring];
+
+	if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
+	    radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
+
+		trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
+		radeon_fence_unref(&vm_id->flushed_updates);
+		vm_id->flushed_updates = radeon_fence_ref(updates);
+		vm_id->pd_gpu_addr = pd_addr;
+		radeon_ring_vm_flush(rdev, &rdev->ring[ring],
+				     vm_id->id, vm_id->pd_gpu_addr);
 
-	/* if we can't remember our last VM flush then flush now! */
-	if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) {
-		trace_radeon_vm_flush(pd_addr, ring, vm->id);
-		vm->pd_gpu_addr = pd_addr;
-		radeon_ring_vm_flush(rdev, ring, vm);
 	}
 }
 
@@ -263,18 +269,13 @@ void radeon_vm_fence(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_fence *fence)
 {
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(fence);
-
-	radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
-	rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
+	unsigned vm_id = vm->ids[fence->ring].id;
 
-	radeon_fence_unref(&vm->last_id_use);
-	vm->last_id_use = radeon_fence_ref(fence);
+	radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
+	rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
 
-        /* we just flushed the VM, remember that */
-        if (!vm->last_flush)
-                vm->last_flush = radeon_fence_ref(fence);
+	radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
+	vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
 }
 
 /**
@@ -387,35 +388,25 @@ static void radeon_vm_set_pages(struct radeon_device *rdev,
 static int radeon_vm_clear_bo(struct radeon_device *rdev,
 			      struct radeon_bo *bo)
 {
-        struct ttm_validate_buffer tv;
-        struct ww_acquire_ctx ticket;
-        struct list_head head;
 	struct radeon_ib ib;
 	unsigned entries;
 	uint64_t addr;
 	int r;
 
-        memset(&tv, 0, sizeof(tv));
-        tv.bo = &bo->tbo;
-	tv.shared = false;
-
-        INIT_LIST_HEAD(&head);
-        list_add(&tv.head, &head);
-
-        r = ttm_eu_reserve_buffers(&ticket, &head, true);
-        if (r)
+	r = radeon_bo_reserve(bo, false);
+	if (r)
 		return r;
 
-        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-        if (r)
-                goto error;
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+	if (r)
+		goto error_unreserve;
 
 	addr = radeon_bo_gpu_offset(bo);
 	entries = radeon_bo_size(bo) / 8;
 
 	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256);
 	if (r)
-                goto error;
+		goto error_unreserve;
 
 	ib.length_dw = 0;
 
@@ -425,15 +416,16 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
 
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r)
-                goto error;
+		goto error_free;
 
-	ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base);
-	radeon_ib_free(rdev, &ib);
+	ib.fence->is_vm_update = true;
+	radeon_bo_fence(bo, ib.fence, false);
 
-	return 0;
+error_free:
+	radeon_ib_free(rdev, &ib);
 
-error:
-	ttm_eu_backoff_reservation(&ticket, &head);
+error_unreserve:
+	radeon_bo_unreserve(bo);
 	return r;
 }
 
@@ -449,7 +441,7 @@ error:
  * Validate and set the offset requested within the vm address space.
  * Returns 0 for success, error for failure.
  *
- * Object has to be reserved!
+ * Object has to be reserved and gets unreserved by this function!
  */
 int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 			  struct radeon_bo_va *bo_va,
@@ -495,7 +487,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 			tmp->vm = vm;
 			tmp->addr = bo_va->addr;
 			tmp->bo = radeon_bo_ref(bo_va->bo);
+			spin_lock(&vm->status_lock);
 			list_add(&tmp->vm_status, &vm->freed);
+			spin_unlock(&vm->status_lock);
 		}
 
 		interval_tree_remove(&bo_va->it, &vm->va);
@@ -575,7 +569,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 	}
 
 	mutex_unlock(&vm->mutex);
-	return radeon_bo_reserve(bo_va->bo, false);
+	return 0;
 }
 
 /**
@@ -699,17 +693,15 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
 	if (ib.length_dw != 0) {
 		radeon_asic_vm_pad_ib(rdev, &ib);
 
-		radeon_semaphore_sync_resv(rdev, ib.semaphore, pd->tbo.resv, false);
-		radeon_semaphore_sync_fence(ib.semaphore, vm->last_id_use);
+		radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true);
 		WARN_ON(ib.length_dw > ndw);
 		r = radeon_ib_schedule(rdev, &ib, NULL, false);
 		if (r) {
 			radeon_ib_free(rdev, &ib);
 			return r;
 		}
-		radeon_fence_unref(&vm->fence);
-		vm->fence = radeon_fence_ref(ib.fence);
-		radeon_fence_unref(&vm->last_flush);
+		ib.fence->is_vm_update = true;
+		radeon_bo_fence(pd, ib.fence, false);
 	}
 	radeon_ib_free(rdev, &ib);
 
@@ -808,11 +800,11 @@ static void radeon_vm_frag_ptes(struct radeon_device *rdev,
  *
  * Global and local mutex must be locked!
  */
-static void radeon_vm_update_ptes(struct radeon_device *rdev,
-				  struct radeon_vm *vm,
-				  struct radeon_ib *ib,
-				  uint64_t start, uint64_t end,
-				  uint64_t dst, uint32_t flags)
+static int radeon_vm_update_ptes(struct radeon_device *rdev,
+				 struct radeon_vm *vm,
+				 struct radeon_ib *ib,
+				 uint64_t start, uint64_t end,
+				 uint64_t dst, uint32_t flags)
 {
 	uint64_t mask = RADEON_VM_PTE_COUNT - 1;
 	uint64_t last_pte = ~0, last_dst = ~0;
@@ -825,8 +817,12 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
 		struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
 		unsigned nptes;
 		uint64_t pte;
+		int r;
 
-		radeon_semaphore_sync_resv(rdev, ib->semaphore, pt->tbo.resv, false);
+		radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true);
+		r = reservation_object_reserve_shared(pt->tbo.resv);
+		if (r)
+			return r;
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
@@ -860,6 +856,33 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
 				    last_pte + 8 * count,
 				    last_dst, flags);
 	}
+
+	return 0;
+}
+
+/**
+ * radeon_vm_fence_pts - fence page tables after an update
+ *
+ * @vm: requested vm
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @fence: fence to use
+ *
+ * Fence the page tables in the range @start - @end (cayman+).
+ *
+ * Global and local mutex must be locked!
+ */
+static void radeon_vm_fence_pts(struct radeon_vm *vm,
+				uint64_t start, uint64_t end,
+				struct radeon_fence *fence)
+{
+	unsigned i;
+
+	start >>= radeon_vm_block_size;
+	end >>= radeon_vm_block_size;
+
+	for (i = start; i <= end; ++i)
+		radeon_bo_fence(vm->page_tables[i].bo, fence, true);
 }
 
 /**
@@ -892,7 +915,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 		return -EINVAL;
 	}
 
+	spin_lock(&vm->status_lock);
 	list_del_init(&bo_va->vm_status);
+	spin_unlock(&vm->status_lock);
 
 	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
 	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
@@ -961,23 +986,34 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 		return r;
 	ib.length_dw = 0;
 
-	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
-			      bo_va->it.last + 1, addr,
-			      radeon_vm_page_flags(bo_va->flags));
+	if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) {
+		unsigned i;
+
+		for (i = 0; i < RADEON_NUM_RINGS; ++i)
+			radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use);
+	}
+
+	r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
+				  bo_va->it.last + 1, addr,
+				  radeon_vm_page_flags(bo_va->flags));
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		return r;
+	}
 
 	radeon_asic_vm_pad_ib(rdev, &ib);
 	WARN_ON(ib.length_dw > ndw);
 
-	radeon_semaphore_sync_fence(ib.semaphore, vm->fence);
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
 		return r;
 	}
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(ib.fence);
+	ib.fence->is_vm_update = true;
+	radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
+	radeon_fence_unref(&bo_va->last_pt_update);
+	bo_va->last_pt_update = radeon_fence_ref(ib.fence);
 	radeon_ib_free(rdev, &ib);
-	radeon_fence_unref(&vm->last_flush);
 
 	return 0;
 }
@@ -996,16 +1032,25 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 int radeon_vm_clear_freed(struct radeon_device *rdev,
 			  struct radeon_vm *vm)
 {
-	struct radeon_bo_va *bo_va, *tmp;
+	struct radeon_bo_va *bo_va;
 	int r;
 
-	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->freed)) {
+		bo_va = list_first_entry(&vm->freed,
+			struct radeon_bo_va, vm_status);
+		spin_unlock(&vm->status_lock);
+
 		r = radeon_vm_bo_update(rdev, bo_va, NULL);
 		radeon_bo_unref(&bo_va->bo);
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 		if (r)
 			return r;
+
+		spin_lock(&vm->status_lock);
 	}
+	spin_unlock(&vm->status_lock);
 	return 0;
 
 }
@@ -1024,14 +1069,23 @@ int radeon_vm_clear_freed(struct radeon_device *rdev,
 int radeon_vm_clear_invalids(struct radeon_device *rdev,
 			     struct radeon_vm *vm)
 {
-	struct radeon_bo_va *bo_va, *tmp;
+	struct radeon_bo_va *bo_va;
 	int r;
 
-	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, vm_status) {
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->invalidated)) {
+		bo_va = list_first_entry(&vm->invalidated,
+			struct radeon_bo_va, vm_status);
+		spin_unlock(&vm->status_lock);
+
 		r = radeon_vm_bo_update(rdev, bo_va, NULL);
 		if (r)
 			return r;
+
+		spin_lock(&vm->status_lock);
 	}
+	spin_unlock(&vm->status_lock);
+
 	return 0;
 }
 
@@ -1054,14 +1108,17 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
 
 	mutex_lock(&vm->mutex);
 	interval_tree_remove(&bo_va->it, &vm->va);
+	spin_lock(&vm->status_lock);
 	list_del(&bo_va->vm_status);
 
 	if (bo_va->addr) {
 		bo_va->bo = radeon_bo_ref(bo_va->bo);
 		list_add(&bo_va->vm_status, &vm->freed);
 	} else {
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 	}
+	spin_unlock(&vm->status_lock);
 
 	mutex_unlock(&vm->mutex);
 }
@@ -1082,10 +1139,10 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
 
 	list_for_each_entry(bo_va, &bo->va, bo_list) {
 		if (bo_va->addr) {
-			mutex_lock(&bo_va->vm->mutex);
+			spin_lock(&bo_va->vm->status_lock);
 			list_del(&bo_va->vm_status);
 			list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
-			mutex_unlock(&bo_va->vm->mutex);
+			spin_unlock(&bo_va->vm->status_lock);
 		}
 	}
 }
@@ -1103,15 +1160,17 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
 		RADEON_VM_PTE_COUNT * 8);
 	unsigned pd_size, pd_entries, pts_size;
-	int r;
+	int i, r;
 
-	vm->id = 0;
 	vm->ib_bo_va = NULL;
-	vm->fence = NULL;
-	vm->last_flush = NULL;
-	vm->last_id_use = NULL;
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		vm->ids[i].id = 0;
+		vm->ids[i].flushed_updates = NULL;
+		vm->ids[i].last_id_use = NULL;
+	}
 	mutex_init(&vm->mutex);
 	vm->va = RB_ROOT;
+	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->freed);
 
@@ -1165,11 +1224,13 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 		if (!r) {
 			list_del_init(&bo_va->bo_list);
 			radeon_bo_unreserve(bo_va->bo);
+			radeon_fence_unref(&bo_va->last_pt_update);
 			kfree(bo_va);
 		}
 	}
 	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
 		radeon_bo_unref(&bo_va->bo);
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 	}
 
@@ -1179,9 +1240,10 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 
 	radeon_bo_unref(&vm->page_directory);
 
-	radeon_fence_unref(&vm->fence);
-	radeon_fence_unref(&vm->last_flush);
-	radeon_fence_unref(&vm->last_id_use);
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		radeon_fence_unref(&vm->ids[i].flushed_updates);
+		radeon_fence_unref(&vm->ids[i].last_id_use);
+	}
 
 	mutex_destroy(&vm->mutex);
 }