6 files changed, 126 insertions, 53 deletions
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 22b57020790d..d79607a1187c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -70,6 +70,7 @@ static inline int ttm_mem_type_from_place(const struct ttm_place *place,
 static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type)
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+	struct drm_printer p = drm_debug_printer(TTM_PFX);
 
 	pr_err("    has_type: %d\n", man->has_type);
 	pr_err("    use_type: %d\n", man->use_type);
@@ -79,7 +80,7 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type)
 	pr_err("    available_caching: 0x%08X\n", man->available_caching);
 	pr_err("    default_caching: 0x%08X\n", man->default_caching);
 	if (mem_type != TTM_PL_SYSTEM)
-		(*man->func->debug)(man, TTM_PFX);
+		(*man->func->debug)(man, &p);
 }
 
 static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
@@ -108,8 +109,8 @@ static ssize_t ttm_bo_global_show(struct kobject *kobj,
 	struct ttm_bo_global *glob =
 		container_of(kobj, struct ttm_bo_global, kobj);
 
-	return snprintf(buffer, PAGE_SIZE, "%lu\n",
-			(unsigned long) atomic_read(&glob->bo_count));
+	return snprintf(buffer, PAGE_SIZE, "%d\n",
+				atomic_read(&glob->bo_count));
 }
 
 static struct attribute *ttm_bo_global_attrs[] = {
@@ -394,14 +395,33 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 	ww_mutex_unlock (&bo->resv->lock);
 }
 
+static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
+{
+	int r;
+
+	if (bo->resv == &bo->ttm_resv)
+		return 0;
+
+	reservation_object_init(&bo->ttm_resv);
+	BUG_ON(!reservation_object_trylock(&bo->ttm_resv));
+
+	r = reservation_object_copy_fences(&bo->ttm_resv, bo->resv);
+	if (r) {
+		reservation_object_unlock(&bo->ttm_resv);
+		reservation_object_fini(&bo->ttm_resv);
+	}
+
+	return r;
+}
+
 static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
 {
 	struct reservation_object_list *fobj;
 	struct dma_fence *fence;
 	int i;
 
-	fobj = reservation_object_get_list(bo->resv);
-	fence = reservation_object_get_excl(bo->resv);
+	fobj = reservation_object_get_list(&bo->ttm_resv);
+	fence = reservation_object_get_excl(&bo->ttm_resv);
 	if (fence && !fence->ops->signaled)
 		dma_fence_enable_sw_signaling(fence);
 
@@ -420,18 +440,30 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 	struct ttm_bo_global *glob = bo->glob;
 	int ret;
 
+	ret = ttm_bo_individualize_resv(bo);
+	if (ret) {
+		/* Last resort, if we fail to allocate memory for the
+		 * fences block for the BO to become idle
+		 */
+		reservation_object_wait_timeout_rcu(bo->resv, true, false,
+						    30 * HZ);
+		spin_lock(&glob->lru_lock);
+		goto error;
+	}
+
 	spin_lock(&glob->lru_lock);
 	ret = __ttm_bo_reserve(bo, false, true, NULL);
-
 	if (!ret) {
-		if (!ttm_bo_wait(bo, false, true)) {
+		if (reservation_object_test_signaled_rcu(&bo->ttm_resv, true)) {
 			ttm_bo_del_from_lru(bo);
 			spin_unlock(&glob->lru_lock);
+			if (bo->resv != &bo->ttm_resv)
+				reservation_object_unlock(&bo->ttm_resv);
 			ttm_bo_cleanup_memtype_use(bo);
-
 			return;
-		} else
-			ttm_bo_flush_all_fences(bo);
+		}
+
+		ttm_bo_flush_all_fences(bo);
 
 		/*
 		 * Make NO_EVICT bos immediately available to
@@ -445,7 +477,10 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 
 		__ttm_bo_unreserve(bo);
 	}
+	if (bo->resv != &bo->ttm_resv)
+		reservation_object_unlock(&bo->ttm_resv);
 
+error:
 	kref_get(&bo->list_kref);
 	list_add_tail(&bo->ddestroy, &bdev->ddestroy);
 	spin_unlock(&glob->lru_lock);
@@ -471,17 +506,25 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 					  bool no_wait_gpu)
 {
 	struct ttm_bo_global *glob = bo->glob;
+	struct reservation_object *resv;
 	int ret;
 
-	ret = ttm_bo_wait(bo, false, true);
+	if (unlikely(list_empty(&bo->ddestroy)))
+		resv = bo->resv;
+	else
+		resv = &bo->ttm_resv;
+
+	if (reservation_object_test_signaled_rcu(resv, true))
+		ret = 0;
+	else
+		ret = -EBUSY;
 
 	if (ret && !no_wait_gpu) {
 		long lret;
 		ww_mutex_unlock(&bo->resv->lock);
 		spin_unlock(&glob->lru_lock);
 
-		lret = reservation_object_wait_timeout_rcu(bo->resv,
-							   true,
+		lret = reservation_object_wait_timeout_rcu(resv, true,
 							   interruptible,
 							   30 * HZ);
 
@@ -505,13 +548,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 			spin_unlock(&glob->lru_lock);
 			return 0;
 		}
-
-		/*
-		 * remove sync_obj with ttm_bo_wait, the wait should be
-		 * finished, and no new wait object should have been added.
-		 */
-		ret = ttm_bo_wait(bo, false, true);
-		WARN_ON(ret);
 	}
 
 	if (ret || unlikely(list_empty(&bo->ddestroy))) {
@@ -521,6 +557,8 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 	}
 
 	ttm_bo_del_from_lru(bo);
+	if (!list_empty(&bo->ddestroy) && (bo->resv != &bo->ttm_resv))
+		reservation_object_fini(&bo->ttm_resv);
 	list_del_init(&bo->ddestroy);
 	kref_put(&bo->list_kref, ttm_bo_ref_bug);
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index 90a6c0b03afc..a7c232dc39cb 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -136,13 +136,12 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man)
 }
 
 static void ttm_bo_man_debug(struct ttm_mem_type_manager *man,
-			     const char *prefix)
+			     struct drm_printer *printer)
 {
 	struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
-	struct drm_printer p = drm_debug_printer(prefix);
 
 	spin_lock(&rman->lock);
-	drm_mm_print(&rman->mm, &p);
+	drm_mm_print(&rman->mm, printer);
 	spin_unlock(&rman->lock);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index d0459b392e5e..78cb99be7146 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -469,6 +469,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	 * TODO: Explicit member copy would probably be better here.
 	 */
 
+	atomic_inc(&bo->glob->bo_count);
 	INIT_LIST_HEAD(&fbo->ddestroy);
 	INIT_LIST_HEAD(&fbo->lru);
 	INIT_LIST_HEAD(&fbo->swap);
@@ -586,7 +587,6 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo,
 	unsigned long offset, size;
 	int ret;
 
-	BUG_ON(!list_empty(&bo->swap));
 	map->virtual = NULL;
 	map->bo = bo;
 	if (num_pages > bo->num_pages)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index a01e5c90fd87..c8ebb757e36b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -39,6 +39,7 @@
 #include <linux/rbtree.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <linux/mem_encrypt.h>
 
 #define TTM_BO_VM_NUM_PREFAULT 16
 
@@ -230,9 +231,11 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf)
 	 * first page.
 	 */
 	for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
-		if (bo->mem.bus.is_iomem)
+		if (bo->mem.bus.is_iomem) {
+			/* Iomem should not be marked encrypted */
+			cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
 			pfn = bdev->driver->io_mem_pfn(bo, page_offset);
-		else {
+		} else {
 			page = ttm->pages[page_offset];
 			if (unlikely(!page && i == 0)) {
 				retval = VM_FAULT_OOM;
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index eeddc1e48409..052e1f102113 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -615,7 +615,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool,
 		} else {
 			pr_err("Failed to fill pool (%p)\n", pool);
 			/* If we have any pages left put them to the pool. */
-			list_for_each_entry(p, &pool->list, lru) {
+			list_for_each_entry(p, &new_pages, lru) {
 				++cpages;
 			}
 			list_splice(&new_pages, &pool->list);
@@ -920,6 +920,49 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm)
 }
 EXPORT_SYMBOL(ttm_pool_unpopulate);
 
+#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU)
+int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt)
+{
+	unsigned i;
+	int r;
+
+	r = ttm_pool_populate(&tt->ttm);
+	if (r)
+		return r;
+
+	for (i = 0; i < tt->ttm.num_pages; i++) {
+		tt->dma_address[i] = dma_map_page(dev, tt->ttm.pages[i],
+						  0, PAGE_SIZE,
+						  DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, tt->dma_address[i])) {
+			while (i--) {
+				dma_unmap_page(dev, tt->dma_address[i],
+					       PAGE_SIZE, DMA_BIDIRECTIONAL);
+				tt->dma_address[i] = 0;
+			}
+			ttm_pool_unpopulate(&tt->ttm);
+			return -EFAULT;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ttm_populate_and_map_pages);
+
+void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt)
+{
+	unsigned i;
+	
+	for (i = 0; i < tt->ttm.num_pages; i++) {
+		if (tt->dma_address[i]) {
+			dma_unmap_page(dev, tt->dma_address[i],
+				       PAGE_SIZE, DMA_BIDIRECTIONAL);
+		}
+	}
+	ttm_pool_unpopulate(&tt->ttm);
+}
+EXPORT_SYMBOL(ttm_unmap_and_unpopulate_pages);
+#endif
+
 int ttm_page_alloc_debugfs(struct seq_file *m, void *data)
 {
 	struct ttm_page_pool *p;
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index 90ddbdca93bd..06bc14b55e66 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -60,22 +60,15 @@
 #define NUM_PAGES_TO_ALLOC		(PAGE_SIZE/sizeof(struct page *))
 #define SMALL_ALLOCATION		4
 #define FREE_ALL_PAGES			(~0U)
-/* times are in msecs */
-#define IS_UNDEFINED			(0)
-#define IS_WC				(1<<1)
-#define IS_UC				(1<<2)
-#define IS_CACHED			(1<<3)
-#define IS_DMA32			(1<<4)
 
 enum pool_type {
-	POOL_IS_UNDEFINED,
-	POOL_IS_WC = IS_WC,
-	POOL_IS_UC = IS_UC,
-	POOL_IS_CACHED = IS_CACHED,
-	POOL_IS_WC_DMA32 = IS_WC | IS_DMA32,
-	POOL_IS_UC_DMA32 = IS_UC | IS_DMA32,
-	POOL_IS_CACHED_DMA32 = IS_CACHED | IS_DMA32,
+	IS_UNDEFINED	= 0,
+	IS_WC		= 1 << 1,
+	IS_UC		= 1 << 2,
+	IS_CACHED	= 1 << 3,
+	IS_DMA32	= 1 << 4
 };
+
 /*
  * The pool structure. There are usually six pools:
  *  - generic (not restricted to DMA32):
@@ -86,11 +79,9 @@ enum pool_type {
  * The other ones can be shrunk by the shrinker API if neccessary.
  * @pools: The 'struct device->dma_pools' link.
  * @type: Type of the pool
- * @lock: Protects the inuse_list and free_list from concurrnet access. Must be
+ * @lock: Protects the free_list from concurrnet access. Must be
  * used with irqsave/irqrestore variants because pool allocator maybe called
  * from delayed work.
- * @inuse_list: Pool of pages that are in use. The order is very important and
- *   it is in the order that the TTM pages that are put back are in.
  * @free_list: Pool of pages that are free to be used. No order requirements.
  * @dev: The device that is associated with these pools.
  * @size: Size used during DMA allocation.
@@ -107,7 +98,6 @@ struct dma_pool {
 	struct list_head pools; /* The 'struct device->dma_pools link */
 	enum pool_type type;
 	spinlock_t lock;
-	struct list_head inuse_list;
 	struct list_head free_list;
 	struct device *dev;
 	unsigned size;
@@ -609,7 +599,6 @@ static struct dma_pool *ttm_dma_pool_init(struct device *dev, gfp_t flags,
 	sec_pool->pool =  pool;
 
 	INIT_LIST_HEAD(&pool->free_list);
-	INIT_LIST_HEAD(&pool->inuse_list);
 	INIT_LIST_HEAD(&pool->pools);
 	spin_lock_init(&pool->lock);
 	pool->dev = dev;
@@ -882,22 +871,23 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 	struct dma_pool *pool;
 	enum pool_type type;
 	unsigned i;
-	gfp_t gfp_flags;
 	int ret;
 
 	if (ttm->state != tt_unpopulated)
 		return 0;
 
 	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
-	if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
-		gfp_flags = GFP_USER | GFP_DMA32;
-	else
-		gfp_flags = GFP_HIGHUSER;
-	if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
-		gfp_flags |= __GFP_ZERO;
-
 	pool = ttm_dma_find_pool(dev, type);
 	if (!pool) {
+		gfp_t gfp_flags;
+
+		if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
+			gfp_flags = GFP_USER | GFP_DMA32;
+		else
+			gfp_flags = GFP_HIGHUSER;
+		if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
+			gfp_flags |= __GFP_ZERO;
+
 		pool = ttm_dma_pool_init(dev, gfp_flags, type);
 		if (IS_ERR_OR_NULL(pool)) {
 			return -ENOMEM;