From d7f46fc4e7323887494db13f063a8e59861fefb0 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:55 -0800 Subject: drm/i915: Make pin count per VMA Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9282b4c411f6..a2d6eb5336e3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -566,7 +566,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) i915_gem_object_unpin_fence(obj); if (entry->flags & __EXEC_OBJECT_HAS_PIN) - i915_gem_object_unpin(obj); + vma->pin_count--; entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); } @@ -923,7 +923,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, if (obj->base.write_domain) { obj->dirty = 1; obj->last_write_seqno = intel_ring_get_seqno(ring); - if (obj->pin_count) /* check for potential scanout */ + /* check for potential scanout */ + if (i915_gem_obj_ggtt_bound(obj) && + i915_gem_obj_to_ggtt(obj)->pin_count) intel_mark_fb_busy(obj, ring); } -- cgit v1.2.3 From 6f65e29acad7499920cf1e49b675fac7cde24166 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:56 -0800 Subject: drm/i915: Create bind/unbind abstraction for VMAs To sum up what goes on here, we abstract the vma binding, similarly to the previous object binding. This helps for distinguishing legacy binding, versus modern binding. To keep the code churn as minimal as possible, I am leaving in insert_entries(). It serves as the per platform pte writing basically. bind_vma and insert_entries do share a lot of similarities, and I did have designs to combine the two, but as mentioned already... too much churn in an already massive patchset. What follows are the 3 commits which existed discretely in the original submissions. Upon rebasing on Broadwell support, it became clear that separation was not good, and only made for more error prone code. Below are the 3 commit messages with all their history. drm/i915: Add bind/unbind object functions to VMA drm/i915: Use the new vm [un]bind functions drm/i915: reduce vm->insert_entries() usage drm/i915: Add bind/unbind object functions to VMA As we plumb the code with more VM information, it has become more obvious that the easiest way to deal with bind and unbind is to simply put the function pointers in the vm, and let those choose the correct way to handle the page table updates. This change allows many places in the code to simply be vm->bind, and not have to worry about distinguishing PPGTT vs GGTT. Notice that this patch has no impact on functionality. I've decided to save the actual change until the next patch because I think it's easier to review that way. I'm happy to squash the two, or let Daniel do it on merge. v2: Make ggtt handle the quirky aliasing ppgtt Add flags to bind object to support above Don't ever call bind/unbind directly for PPGTT until we have real, full PPGTT (use NULLs to assert this) Make sure we rebind the ggtt if there already is a ggtt binding. This happens on set cache levels. Use VMA for bind/unbind (Daniel, Ben) v3: Reorganize ggtt_vma_bind to be more concise and easier to read (Ville). Change logic in unbind to only unbind ggtt when there is a global mapping, and to remove a redundant check if the aliasing ppgtt exists. v4: Make the bind function a bit smarter about the cache levels to avoid unnecessary multiple remaps. "I accept it is a wart, I think unifying the pin_vma / bind_vma could be unified later" (Chris) Removed the git notes, and put version info here. (Daniel) v5: Update the comment to not suck (Chris) v6: Move bind/unbind to the VMA. It makes more sense in the VMA structure (always has, but I was previously lazy). With this change, it will allow us to keep a distinct insert_entries. Reviewed-by: Chris Wilson Signed-off-by: Ben Widawsky drm/i915: Use the new vm [un]bind functions Building on the last patch which created the new function pointers in the VM for bind/unbind, here we actually put those new function pointers to use. Split out as a separate patch to aid in review. I'm fine with squashing into the previous patch if people request it. v2: Updated to address the smart ggtt which can do aliasing as needed Make sure we bind to global gtt when mappable and fenceable. I thought we could get away without this initialy, but we cannot. v3: Make the global GTT binding explicitly use the ggtt VM for bind_vma(). While at it, use the new ggtt_vma helper (Chris) At this point the original mailing list thread diverges. ie. v4^: use target_obj instead of obj for gen6 relocate_entry vma->bind_vma() can be called safely during pin. So simply do that instead of the complicated conditionals. Don't restore PPGTT bound objects on resume path Bug fix in resume path for globally bound Bos Properly handle secure dispatch Rebased on vma bind/unbind conversion Signed-off-by: Ben Widawsky drm/i915: reduce vm->insert_entries() usage FKA: drm/i915: eliminate vm->insert_entries() With bind/unbind function pointers in place, we no longer need insert_entries. We could, and want, to remove clear_range, however it's not totally easy at this point. Since it's used in a couple of place still that don't only deal in objects: setup, ppgtt init, and restore gtt mappings. v2: Don't actually remove insert_entries, just limit its usage. It will be useful when we introduce gen8. It will always be called from the vma bind/unbind. Reviewed-by: Chris Wilson (v1) Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 103 ++++++++-------- drivers/gpu/drm/i915/i915_gem.c | 61 ++------- drivers/gpu/drm/i915/i915_gem_context.c | 8 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 43 ++++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 192 +++++++++++++++++++++++------ 5 files changed, 245 insertions(+), 162 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bf022c4a5773..9fe078b41674 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -523,6 +523,57 @@ enum i915_cache_level { typedef uint32_t gen6_gtt_pte_t; +/** + * A VMA represents a GEM BO that is bound into an address space. Therefore, a + * VMA's presence cannot be guaranteed before binding, or after unbinding the + * object into/from the address space. + * + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime + * will always be <= an objects lifetime. So object refcounting should cover us. + */ +struct i915_vma { + struct drm_mm_node node; + struct drm_i915_gem_object *obj; + struct i915_address_space *vm; + + /** This object's place on the active/inactive lists */ + struct list_head mm_list; + + struct list_head vma_link; /* Link in the object's VMA list */ + + /** This vma's place in the batchbuffer or on the eviction list */ + struct list_head exec_list; + + /** + * Used for performing relocations during execbuffer insertion. + */ + struct hlist_node exec_node; + unsigned long exec_handle; + struct drm_i915_gem_exec_object2 *exec_entry; + + /** + * How many users have pinned this object in GTT space. The following + * users can each hold at most one reference: pwrite/pread, pin_ioctl + * (via user_pin_count), execbuffer (objects are not allowed multiple + * times for the same batchbuffer), and the framebuffer code. When + * switching/pageflipping, the framebuffer code has at most two buffers + * pinned per crtc. + * + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 + * bits with absolutely no headroom. So use 4 bits. */ + unsigned int pin_count:4; +#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf + + /** Unmap an object from an address space. This usually consists of + * setting the valid PTE entries to a reserved scratch page. */ + void (*unbind_vma)(struct i915_vma *vma); + /* Map an object into an address space with the given cache flags. */ +#define GLOBAL_BIND (1<<0) + void (*bind_vma)(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); +}; + struct i915_address_space { struct drm_mm mm; struct drm_device *dev; @@ -623,49 +674,6 @@ struct i915_hw_ppgtt { int (*enable)(struct drm_device *dev); }; -/** - * A VMA represents a GEM BO that is bound into an address space. Therefore, a - * VMA's presence cannot be guaranteed before binding, or after unbinding the - * object into/from the address space. - * - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime - * will always be <= an objects lifetime. So object refcounting should cover us. - */ -struct i915_vma { - struct drm_mm_node node; - struct drm_i915_gem_object *obj; - struct i915_address_space *vm; - - /** This object's place on the active/inactive lists */ - struct list_head mm_list; - - struct list_head vma_link; /* Link in the object's VMA list */ - - /** This vma's place in the batchbuffer or on the eviction list */ - struct list_head exec_list; - - /** - * Used for performing relocations during execbuffer insertion. - */ - struct hlist_node exec_node; - unsigned long exec_handle; - struct drm_i915_gem_exec_object2 *exec_entry; - - /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, pin_ioctl - * (via user_pin_count), execbuffer (objects are not allowed multiple - * times for the same batchbuffer), and the framebuffer code. When - * switching/pageflipping, the framebuffer code has at most two buffers - * pinned per crtc. - * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. - */ - unsigned int pin_count:4; -#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf -}; - struct i915_ctx_hang_stats { /* This context had batch pending when hang was declared */ unsigned batch_pending; @@ -2242,19 +2250,10 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, /* i915_gem_gtt.c */ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev); -void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level); -void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_object *obj); - void i915_check_and_clear_faults(struct drm_device *dev); void i915_gem_suspend_gtt_mappings(struct drm_device *dev); void i915_gem_restore_gtt_mappings(struct drm_device *dev); int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); -void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level); -void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj); void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj); void i915_gem_init_global_gtt(struct drm_device *dev); void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6dc96bceed19..2b92e89c5369 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2743,12 +2743,8 @@ int i915_vma_unbind(struct i915_vma *vma) trace_i915_vma_unbind(vma); - if (obj->has_global_gtt_mapping) - i915_gem_gtt_unbind_object(obj); - if (obj->has_aliasing_ppgtt_mapping) { - i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); - obj->has_aliasing_ppgtt_mapping = 0; - } + vma->unbind_vma(vma); + i915_gem_gtt_finish_object(obj); list_del(&vma->mm_list); @@ -3479,7 +3475,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; struct i915_vma *vma; int ret; @@ -3518,11 +3513,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return ret; } - if (obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(obj, cache_level); - if (obj->has_aliasing_ppgtt_mapping) - i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, - obj, cache_level); + list_for_each_entry(vma, &obj->vma_list, vma_link) + vma->bind_vma(vma, cache_level, 0); } list_for_each_entry(vma, &obj->vma_list, vma_link) @@ -3850,6 +3842,7 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, bool map_and_fenceable, bool nonblocking) { + const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0; struct i915_vma *vma; int ret; @@ -3878,20 +3871,17 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, } if (!i915_gem_obj_bound(obj, vm)) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - ret = i915_gem_object_bind_to_vm(obj, vm, alignment, map_and_fenceable, nonblocking); if (ret) return ret; - if (!dev_priv->mm.aliasing_ppgtt) - i915_gem_gtt_bind_object(obj, obj->cache_level); } - if (!obj->has_global_gtt_mapping && map_and_fenceable) - i915_gem_gtt_bind_object(obj, obj->cache_level); + vma = i915_gem_obj_to_vma(obj, vm); + + vma->bind_vma(vma, obj->cache_level, flags); i915_gem_obj_to_vma(obj, vm)->pin_count++; obj->pin_mappable |= map_and_fenceable; @@ -4235,41 +4225,6 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, return NULL; } -static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); - if (vma == NULL) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&vma->vma_link); - INIT_LIST_HEAD(&vma->mm_list); - INIT_LIST_HEAD(&vma->exec_list); - vma->vm = vm; - vma->obj = obj; - - /* Keep GGTT vmas first to make debug easier */ - if (i915_is_ggtt(vm)) - list_add(&vma->vma_link, &obj->vma_list); - else - list_add_tail(&vma->vma_link, &obj->vma_list); - - return vma; -} - -struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - - vma = i915_gem_obj_to_vma(obj, vm); - if (!vma) - vma = __i915_gem_vma_create(obj, vm); - - return vma; -} - void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b06199175d41..0640ab8b9958 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -408,6 +408,7 @@ mi_set_context(struct intel_ring_buffer *ring, static int do_switch(struct i915_hw_context *to) { struct intel_ring_buffer *ring = to->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_hw_context *from = ring->last_context; u32 hw_flags = 0; int ret, i; @@ -432,8 +433,11 @@ static int do_switch(struct i915_hw_context *to) return ret; } - if (!to->obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(to->obj, to->obj->cache_level); + if (!to->obj->has_global_gtt_mapping) { + struct i915_vma *vma = i915_gem_obj_to_vma(to->obj, + &dev_priv->gtt.base); + vma->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND); + } if (!to->is_initialized || is_default_context(to)) hw_flags |= MI_RESTORE_INHIBIT; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a2d6eb5336e3..c093a2b6f06d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -88,6 +88,7 @@ eb_lookup_vmas(struct eb_vmas *eb, struct i915_address_space *vm, struct drm_file *file) { + struct drm_i915_private *dev_priv = vm->dev->dev_private; struct drm_i915_gem_object *obj; struct list_head objects; int i, ret = 0; @@ -122,6 +123,15 @@ eb_lookup_vmas(struct eb_vmas *eb, i = 0; list_for_each_entry(obj, &objects, obj_exec_link) { struct i915_vma *vma; + struct i915_address_space *bind_vm = vm; + + /* If we have secure dispatch, or the userspace assures us that + * they know what they're doing, use the GGTT VM. + */ + if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT || + ((args->flags & I915_EXEC_SECURE) && + (i == (args->buffer_count - 1)))) + bind_vm = &dev_priv->gtt.base; /* * NOTE: We can leak any vmas created here when something fails @@ -131,7 +141,7 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, vm); + vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm); if (IS_ERR(vma)) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); @@ -315,8 +325,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (unlikely(IS_GEN6(dev) && reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && !target_i915_obj->has_global_gtt_mapping)) { - i915_gem_gtt_bind_object(target_i915_obj, - target_i915_obj->cache_level); + struct i915_vma *vma = i915_gem_obj_to_vma(target_i915_obj, vm); + vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND); } /* Validate that the target is in a valid r/w GPU domain */ @@ -493,11 +503,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct intel_ring_buffer *ring, bool *need_reloc) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; bool need_fence, need_mappable; - struct drm_i915_gem_object *obj = vma->obj; + u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) && + !vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0; int ret; need_fence = @@ -526,14 +537,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, } } - /* Ensure ppgtt mapping exists if needed */ - if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { - i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, - obj, obj->cache_level); - - obj->has_aliasing_ppgtt_mapping = 1; - } - if (entry->offset != vma->node.start) { entry->offset = vma->node.start; *need_reloc = true; @@ -544,9 +547,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; } - if (entry->flags & EXEC_OBJECT_NEEDS_GTT && - !obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(obj, obj->cache_level); + vma->bind_vma(vma, obj->cache_level, flags); return 0; } @@ -1171,8 +1172,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); + if (flags & I915_DISPATCH_SECURE && + !batch_obj->has_global_gtt_mapping) { + /* When we have multiple VMs, we'll need to make sure that we + * allocate space first */ + struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); + BUG_ON(!vma); + vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); + } ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 056033791d24..73117ec333cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -68,6 +68,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ #define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ +static void ppgtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); +static void ppgtt_unbind_vma(struct i915_vma *vma); + static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, bool valid) @@ -746,22 +751,26 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) dev_priv->mm.aliasing_ppgtt = NULL; } -void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level) +static void __always_unused +ppgtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - ppgtt->base.insert_entries(&ppgtt->base, obj->pages, - i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, - cache_level); + const unsigned long entry = vma->node.start >> PAGE_SHIFT; + + WARN_ON(flags); + + vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level); } -void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_object *obj) +static void __always_unused ppgtt_unbind_vma(struct i915_vma *vma) { - ppgtt->base.clear_range(&ppgtt->base, - i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, - obj->base.size >> PAGE_SHIFT, - true); + const unsigned long entry = vma->node.start >> PAGE_SHIFT; + + vma->vm->clear_range(vma->vm, + entry, + vma->obj->base.size >> PAGE_SHIFT, + true); } extern int intel_iommu_gfx_mapped; @@ -863,8 +872,18 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) true); list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + struct i915_vma *vma = i915_gem_obj_to_vma(obj, + &dev_priv->gtt.base); + if (!vma) + continue; + i915_gem_clflush_object(obj, obj->pin_display); - i915_gem_gtt_bind_object(obj, obj->cache_level); + /* The bind_vma code tries to be smart about tracking mappings. + * Unfortunately above, we've just wiped out the mappings + * without telling our object about it. So we need to fake it. + */ + obj->has_global_gtt_mapping = 0; + vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); } i915_gem_chipset_flush(dev); @@ -1023,16 +1042,18 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, readl(gtt_base); } -static void i915_ggtt_insert_entries(struct i915_address_space *vm, - struct sg_table *st, - unsigned int pg_start, - enum i915_cache_level cache_level) + +static void i915_ggtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) { + const unsigned long entry = vma->node.start >> PAGE_SHIFT; unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - intel_gtt_insert_sg_entries(st, pg_start, flags); - + BUG_ON(!i915_is_ggtt(vma->vm)); + intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags); + vma->obj->has_global_gtt_mapping = 1; } static void i915_ggtt_clear_range(struct i915_address_space *vm, @@ -1043,33 +1064,77 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm, intel_gtt_clear_range(first_entry, num_entries); } +static void i915_ggtt_unbind_vma(struct i915_vma *vma) +{ + const unsigned int first = vma->node.start >> PAGE_SHIFT; + const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; -void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level) + BUG_ON(!i915_is_ggtt(vma->vm)); + vma->obj->has_global_gtt_mapping = 0; + intel_gtt_clear_range(first, size); +} + +static void ggtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - struct drm_device *dev = obj->base.dev; + struct drm_device *dev = vma->vm->dev; struct drm_i915_private *dev_priv = dev->dev_private; - const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; + struct drm_i915_gem_object *obj = vma->obj; + const unsigned long entry = vma->node.start >> PAGE_SHIFT; - dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages, - entry, - cache_level); + /* If there is no aliasing PPGTT, or the caller needs a global mapping, + * or we have a global mapping already but the cacheability flags have + * changed, set the global PTEs. + * + * If there is an aliasing PPGTT it is anecdotally faster, so use that + * instead if none of the above hold true. + * + * NB: A global mapping should only be needed for special regions like + * "gtt mappable", SNB errata, or if specified via special execbuf + * flags. At all other times, the GPU will use the aliasing PPGTT. + */ + if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { + if (!obj->has_global_gtt_mapping || + (cache_level != obj->cache_level)) { + vma->vm->insert_entries(vma->vm, obj->pages, entry, + cache_level); + obj->has_global_gtt_mapping = 1; + } + } - obj->has_global_gtt_mapping = 1; + if (dev_priv->mm.aliasing_ppgtt && + (!obj->has_aliasing_ppgtt_mapping || + (cache_level != obj->cache_level))) { + struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; + appgtt->base.insert_entries(&appgtt->base, + vma->obj->pages, entry, cache_level); + vma->obj->has_aliasing_ppgtt_mapping = 1; + } } -void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) +static void ggtt_unbind_vma(struct i915_vma *vma) { - struct drm_device *dev = obj->base.dev; + struct drm_device *dev = vma->vm->dev; struct drm_i915_private *dev_priv = dev->dev_private; - const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; - - dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, - entry, - obj->base.size >> PAGE_SHIFT, - true); + struct drm_i915_gem_object *obj = vma->obj; + const unsigned long entry = vma->node.start >> PAGE_SHIFT; + + if (obj->has_global_gtt_mapping) { + vma->vm->clear_range(vma->vm, entry, + vma->obj->base.size >> PAGE_SHIFT, + true); + obj->has_global_gtt_mapping = 0; + } - obj->has_global_gtt_mapping = 0; + if (obj->has_aliasing_ppgtt_mapping) { + struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; + appgtt->base.clear_range(&appgtt->base, + entry, + obj->base.size >> PAGE_SHIFT, + true); + obj->has_aliasing_ppgtt_mapping = 0; + } } void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) @@ -1444,7 +1509,6 @@ static int i915_gmch_probe(struct drm_device *dev, dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; - dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries; return 0; } @@ -1496,3 +1560,57 @@ int i915_gem_gtt_init(struct drm_device *dev) return 0; } + +static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (vma == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&vma->vma_link); + INIT_LIST_HEAD(&vma->mm_list); + INIT_LIST_HEAD(&vma->exec_list); + vma->vm = vm; + vma->obj = obj; + + switch (INTEL_INFO(vm->dev)->gen) { + case 8: + case 7: + case 6: + vma->unbind_vma = ggtt_unbind_vma; + vma->bind_vma = ggtt_bind_vma; + break; + case 5: + case 4: + case 3: + case 2: + BUG_ON(!i915_is_ggtt(vm)); + vma->unbind_vma = i915_ggtt_unbind_vma; + vma->bind_vma = i915_ggtt_bind_vma; + break; + default: + BUG(); + } + + /* Keep GGTT vmas first to make debug easier */ + if (i915_is_ggtt(vm)) + list_add(&vma->vma_link, &obj->vma_list); + else + list_add_tail(&vma->vma_link, &obj->vma_list); + + return vma; +} + +struct i915_vma * +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma; + + vma = i915_gem_obj_to_vma(obj, vm); + if (!vma) + vma = __i915_gem_vma_create(obj, vm); + + return vma; +} -- cgit v1.2.3 From 3e7a032295f178d1db4e4b9ac25b6d6bc6d5826e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:57 -0800 Subject: drm/i915: Remove vm arg from relocate entry The only place we were using it was for GEN6, which won't have PPGTT support anyway (ie. the VM is always the same). To clear things up, (it only added confusion for me since it doesn't allow us to assert vma->vm is what we always want, when just looking at the code). Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c093a2b6f06d..099998156e6c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -300,8 +300,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *reloc, - struct i915_address_space *vm) + struct drm_i915_gem_relocation_entry *reloc) { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; @@ -325,7 +324,9 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (unlikely(IS_GEN6(dev) && reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && !target_i915_obj->has_global_gtt_mapping)) { - struct i915_vma *vma = i915_gem_obj_to_vma(target_i915_obj, vm); + struct i915_vma *vma = + list_first_entry(&target_i915_obj->vma_list, + typeof(*vma), vma_link); vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND); } @@ -424,8 +425,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, - vma->vm); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); if (ret) return ret; @@ -454,8 +454,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, int i, ret; for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], - vma->vm); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); if (ret) return ret; } -- cgit v1.2.3 From ca01b12b401a0e17a265a5ee18bf33e2cfbd32aa Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:00 -0800 Subject: drm/i915: Simplify ring handling in execbuf Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 41 ++++++++---------------------- 1 file changed, 10 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 099998156e6c..dfe7cb912e29 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1006,41 +1006,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->flags & I915_EXEC_IS_PINNED) flags |= I915_DISPATCH_PINNED; - switch (args->flags & I915_EXEC_RING_MASK) { - case I915_EXEC_DEFAULT: - case I915_EXEC_RENDER: - ring = &dev_priv->ring[RCS]; - break; - case I915_EXEC_BSD: - ring = &dev_priv->ring[VCS]; - if (ctx_id != DEFAULT_CONTEXT_ID) { - DRM_DEBUG("Ring %s doesn't support contexts\n", - ring->name); - return -EPERM; - } - break; - case I915_EXEC_BLT: - ring = &dev_priv->ring[BCS]; - if (ctx_id != DEFAULT_CONTEXT_ID) { - DRM_DEBUG("Ring %s doesn't support contexts\n", - ring->name); - return -EPERM; - } - break; - case I915_EXEC_VEBOX: - ring = &dev_priv->ring[VECS]; - if (ctx_id != DEFAULT_CONTEXT_ID) { - DRM_DEBUG("Ring %s doesn't support contexts\n", - ring->name); - return -EPERM; - } - break; - - default: + if ((args->flags & I915_EXEC_RING_MASK) > I915_NUM_RINGS) { DRM_DEBUG("execbuf with unknown ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); return -EINVAL; } + if (ctx_id != DEFAULT_CONTEXT_ID && + (args->flags & I915_EXEC_RING_MASK) > I915_EXEC_RENDER) + return -EPERM; + + if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) + ring = &dev_priv->ring[RCS]; + else + ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; + if (!intel_ring_initialized(ring)) { DRM_DEBUG("execbuf with invalid ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); -- cgit v1.2.3 From 67e3d2979be1bf42d1818b2961c671eb31e0b4d9 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:01 -0800 Subject: drm/i915: Permit contexts on all rings If we want to use contexts in more abstract terms (specifically with PPGTT in mind), we need to allow them to be specified for any ring. Since the upcoming patches will bring about the use of multiple address spaces, and each ring needs to have an address space programmed (which we intend to do at context switch time), we can no longer only use RCS. With multiple rings having a last context, we must now unreference these contexts. NOTE: This commit requires an update to intel-gpu-tools to make it not fail. v2: Rebased with some logical conflicts. Squashed in the context fini refcount patch Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 54 +++++++++++++++++++++++------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 -- 2 files changed, 42 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 40413704614e..7a5311c3225a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -98,7 +98,8 @@ static struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); -static int do_switch(struct i915_hw_context *to); +static int do_switch(struct intel_ring_buffer *ring, + struct i915_hw_context *to); static size_t get_context_alignment(struct drm_device *dev) { @@ -240,7 +241,7 @@ static int create_default_context(struct drm_device *dev) goto err_destroy; } - ret = do_switch(ctx); + ret = do_switch(&dev_priv->ring[RCS], ctx); if (ret) { DRM_DEBUG_DRIVER("Switch failed %d\n", ret); goto err_unpin; @@ -261,7 +262,8 @@ err_destroy: int i915_gem_context_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int ret; + struct intel_ring_buffer *ring; + int i, ret; if (!HAS_HW_CONTEXTS(dev)) return 0; @@ -284,6 +286,16 @@ int i915_gem_context_init(struct drm_device *dev) return ret; } + for (i = RCS + 1; i < I915_NUM_RINGS; i++) { + if (!(INTEL_INFO(dev)->ring_mask & (1<ring[i]; + + /* NB: RCS will hold a ref for all rings */ + ring->default_context = dev_priv->ring[RCS].default_context; + } + DRM_DEBUG_DRIVER("HW context support initialized\n"); return 0; } @@ -292,6 +304,7 @@ void i915_gem_context_fini(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_context *dctx = dev_priv->ring[RCS].default_context; + int i; if (!HAS_HW_CONTEXTS(dev)) return; @@ -313,12 +326,22 @@ void i915_gem_context_fini(struct drm_device *dev) WARN_ON(dctx->obj->active); i915_gem_object_ggtt_unpin(dctx->obj); i915_gem_context_unreference(dctx); + dev_priv->ring[RCS].last_context = NULL; + } + + for (i = 0; i < I915_NUM_RINGS; i++) { + struct intel_ring_buffer *ring = &dev_priv->ring[i]; + if (!(INTEL_INFO(dev)->ring_mask & (1<last_context) + i915_gem_context_unreference(ring->last_context); + + ring->default_context = NULL; } i915_gem_object_ggtt_unpin(dctx->obj); i915_gem_context_unreference(dctx); - dev_priv->ring[RCS].default_context = NULL; - dev_priv->ring[RCS].last_context = NULL; } static int context_idr_cleanup(int id, void *p, void *data) @@ -431,19 +454,28 @@ mi_set_context(struct intel_ring_buffer *ring, return ret; } -static int do_switch(struct i915_hw_context *to) +static int do_switch(struct intel_ring_buffer *ring, + struct i915_hw_context *to) { - struct intel_ring_buffer *ring = to->ring; struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_hw_context *from = ring->last_context; u32 hw_flags = 0; int ret, i; - BUG_ON(from != NULL && from->obj != NULL && !i915_gem_obj_is_pinned(from->obj)); + if (from != NULL && ring == &dev_priv->ring[RCS]) { + BUG_ON(from->obj == NULL); + BUG_ON(!i915_gem_obj_is_pinned(from->obj)); + } if (from == to && !to->remap_slice) return 0; + if (ring != &dev_priv->ring[RCS]) { + if (from) + i915_gem_context_unreference(from); + goto done; + } + ret = i915_gem_obj_ggtt_pin(to->obj, get_context_alignment(ring->dev), false, false); if (ret) @@ -511,6 +543,7 @@ static int do_switch(struct i915_hw_context *to) i915_gem_context_unreference(from); } +done: i915_gem_context_reference(to); ring->last_context = to; to->is_initialized = true; @@ -541,9 +574,6 @@ int i915_switch_context(struct intel_ring_buffer *ring, WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); - if (ring != &dev_priv->ring[RCS]) - return 0; - if (to_id == DEFAULT_CONTEXT_ID) { to = ring->default_context; } else { @@ -555,7 +585,7 @@ int i915_switch_context(struct intel_ring_buffer *ring, return -ENOENT; } - return do_switch(to); + return do_switch(ring, to); } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dfe7cb912e29..d608a07f4398 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1011,9 +1011,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, (int)(args->flags & I915_EXEC_RING_MASK)); return -EINVAL; } - if (ctx_id != DEFAULT_CONTEXT_ID && - (args->flags & I915_EXEC_RING_MASK) > I915_EXEC_RENDER) - return -EPERM; if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) ring = &dev_priv->ring[RCS]; -- cgit v1.2.3 From 41bde5535a7d48876095926bb55b1aed5ccd6b2c Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:21 -0800 Subject: drm/i915: Get context early in execbuf We need to have the address space when reserving space for the objects. Since the address space and context are tied together, and reserve occurs before context switch (for good reason), we must lookup our context earlier in the process. This leaves some room for optimizations where we no longer need to use ctx_id in certain places. This will be addressed in a subsequent patch. Important tricky bit: Because slow relocations during execbuffer drop struct_mutex Perhaps it would be best to acquire the reference when we get the context, but I'll save that for another day (note I have written the patch before, and I found the changes required to be uglier than this). Note that since we currently access everything via context id, and not the data structure this is fine, though not desirable. The next change attempts to get the context only once via the context ID idr lookup, and as such, the following can happen: CTX-A is created, refcount = 1 CTX-A execbuf, mutex dropped close IOCTL called on CTX-A, refcount = 0 CTX-A resumes in execbuf. v2: Rebased on top of commit b6359918b885da7c7b58c050674278dbd06020ab Author: Mika Kuoppala Date: Wed Oct 30 15:44:16 2013 +0200 drm/i915: add i915_get_reset_stats_ioctl v3: Rebased on top of commit 25b3dfc87bff80317d67ddd2cd4cfb91e6fe7d79 Author: Mika Westerberg Date: Tue Nov 12 11:57:30 2013 +0200 Author: Mika Kuoppala Date: Tue Nov 26 16:14:33 2013 +0200 drm/i915: check context reset stats before relocations Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 8 ++---- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 32 ++++------------------ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 44 ++++++++++++++++++------------ drivers/gpu/drm/i915/intel_uncore.c | 8 ++++-- 5 files changed, 41 insertions(+), 53 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c1adb8268025..4f0b17b617c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2239,7 +2239,9 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); int i915_gem_context_enable(struct drm_i915_private *dev_priv); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct intel_ring_buffer *ring, - struct drm_file *file, int to_id); + struct drm_file *file, struct i915_hw_context *to); +struct i915_hw_context * +i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); void i915_gem_context_free(struct kref *ctx_ref); static inline void i915_gem_context_reference(struct i915_hw_context *ctx) { @@ -2253,10 +2255,6 @@ static inline void i915_gem_context_unreference(struct i915_hw_context *ctx) kref_put(&ctx->ref, i915_gem_context_free); } -struct i915_ctx_hang_stats * __must_check -i915_gem_context_get_hang_stats(struct drm_device *dev, - struct drm_file *file, - u32 id); int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 42647c39ddcc..89e2f92e8335 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2799,7 +2799,7 @@ int i915_gpu_idle(struct drm_device *dev) /* Flush everything onto the inactive list. */ for_each_ring(ring, dev_priv, i) { - ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); + ret = i915_switch_context(ring, NULL, ring->default_context); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 192a25978d39..d3a17ef78ba6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -96,8 +96,6 @@ #define GEN6_CONTEXT_ALIGN (64<<10) #define GEN7_CONTEXT_ALIGN 4096 -static struct i915_hw_context * -i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); static int do_switch(struct intel_ring_buffer *ring, struct i915_hw_context *to); @@ -485,20 +483,6 @@ static int context_idr_cleanup(int id, void *p, void *data) return 0; } -struct i915_ctx_hang_stats * -i915_gem_context_get_hang_stats(struct drm_device *dev, - struct drm_file *file, - u32 id) -{ - struct i915_hw_context *ctx; - - ctx = i915_gem_context_get(file->driver_priv, id); - if (ctx == NULL) - return ERR_PTR(-ENOENT); - - return &ctx->hang_stats; -} - int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; @@ -543,9 +527,12 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) mutex_unlock(&dev->struct_mutex); } -static struct i915_hw_context * +struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) { + if (!HAS_HW_CONTEXTS(file_priv->dev_priv->dev)) + return file_priv->private_default_ctx; + return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id); } @@ -708,20 +695,13 @@ done: */ int i915_switch_context(struct intel_ring_buffer *ring, struct drm_file *file, - int to_id) + struct i915_hw_context *to) { struct drm_i915_private *dev_priv = ring->dev->dev_private; - struct i915_hw_context *to; WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); - if (file == NULL) - to = ring->default_context; - else - to = i915_gem_context_get(file->driver_priv, to_id); - - if (to == NULL) - return -ENOENT; + BUG_ON(file && to == NULL); /* We have the fake context, but don't supports switching. */ if (!HAS_HW_CONTEXTS(ring->dev)) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d608a07f4398..e78c5c0e33d8 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -884,22 +884,24 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, return 0; } -static int +static struct i915_hw_context * i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, const u32 ctx_id) { + struct i915_hw_context *ctx = NULL; struct i915_ctx_hang_stats *hs; - hs = i915_gem_context_get_hang_stats(dev, file, ctx_id); - if (IS_ERR(hs)) - return PTR_ERR(hs); + ctx = i915_gem_context_get(file->driver_priv, ctx_id); + if (IS_ERR_OR_NULL(ctx)) + return ctx; + hs = &ctx->hang_stats; if (hs->banned) { DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); - return -EIO; + return ERR_PTR(-EIO); } - return 0; + return ctx; } static void @@ -975,14 +977,15 @@ static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec, - struct i915_address_space *vm) + struct drm_i915_gem_exec_object2 *exec) { drm_i915_private_t *dev_priv = dev->dev_private; struct eb_vmas *eb; struct drm_i915_gem_object *batch_obj; struct drm_clip_rect *cliprects = NULL; struct intel_ring_buffer *ring; + struct i915_hw_context *ctx; + struct i915_address_space *vm; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 exec_start, exec_len; u32 mask, flags; @@ -1096,11 +1099,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - ret = i915_gem_validate_context(dev, file, ctx_id); - if (ret) { + ctx = i915_gem_validate_context(dev, file, ctx_id); + if (IS_ERR_OR_NULL(ctx)) { mutex_unlock(&dev->struct_mutex); + ret = PTR_ERR(ctx); goto pre_mutex_err; - } + } + + i915_gem_context_reference(ctx); + + /* HACK until we have full PPGTT */ + /* vm = ctx->vm; */ + vm = &dev_priv->gtt.base; eb = eb_create(args); if (eb == NULL) { @@ -1160,7 +1170,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err; - ret = i915_switch_context(ring, file, ctx_id); + ret = i915_switch_context(ring, file, ctx); if (ret) goto err; @@ -1215,6 +1225,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); err: + /* the request owns the ref now */ + i915_gem_context_unreference(ctx); eb_destroy(eb); mutex_unlock(&dev->struct_mutex); @@ -1232,7 +1244,6 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_exec_object *exec_list = NULL; @@ -1288,8 +1299,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); - ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list, - &dev_priv->gtt.base); + ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ for (i = 0; i < args->buffer_count; i++) @@ -1315,7 +1325,6 @@ int i915_gem_execbuffer2(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list = NULL; int ret; @@ -1346,8 +1355,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EFAULT; } - ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list, - &dev_priv->gtt.base); + ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ ret = copy_to_user(to_user_ptr(args->buffers_ptr), diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index feb2d6692544..e52fccecf28f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -836,6 +836,7 @@ int i915_get_reset_stats_ioctl(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_reset_stats *args = data; struct i915_ctx_hang_stats *hs; + struct i915_hw_context *ctx; int ret; if (args->flags || args->pad) @@ -848,11 +849,12 @@ int i915_get_reset_stats_ioctl(struct drm_device *dev, if (ret) return ret; - hs = i915_gem_context_get_hang_stats(dev, file, args->ctx_id); - if (IS_ERR(hs)) { + ctx = i915_gem_context_get(file->driver_priv, args->ctx_id); + if (IS_ERR(ctx)) { mutex_unlock(&dev->struct_mutex); - return PTR_ERR(hs); + return PTR_ERR(ctx); } + hs = &ctx->hang_stats; if (capable(CAP_SYS_ADMIN)) args->reset_count = i915_reset_count(&dev_priv->gpu_error); -- cgit v1.2.3 From 7e0d96bc03c140cb8183955ad6f0290caa731e64 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:26 -0800 Subject: drm/i915: Use multiple VMs -- the point of no return As with processes which run on the CPU, the goal of multiple VMs is to provide process isolation. Specific to GEN, there is also the ability to map more objects per process (2GB each instead of 2Gb-2k total). For the most part, all the pipes have been laid, and all we need to do is remove asserts and actually start changing address spaces with the context switch. Since prior to this we've converted the setting of the page tables to a streamed version, this is quite easy. One important thing to point out (since it'd been hotly contested) is that with this patch, every context created will have it's own address space (provided the HW can do it). v2: Disable BDW on rebase NOTE: I tried to make this commit as small as possible. I needed one place where I could "turn everything on" and that is here. It could be split into finer commits, but I didn't really see much point. Cc: Eric Anholt Cc: Daniel Vetter Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 ++ drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_drv.h | 12 +++++- drivers/gpu/drm/i915/i915_gem.c | 22 ++++------- drivers/gpu/drm/i915/i915_gem_context.c | 60 +++++++++++++++++------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 22 ++++++++--- drivers/gpu/drm/i915/i915_gpu_error.c | 5 --- include/uapi/drm/i915_drm.h | 1 + 9 files changed, 86 insertions(+), 58 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 89af75a6d9a9..9fedfa04357d 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1011,6 +1011,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: value = 1; break; + case I915_PARAM_HAS_FULL_PPGTT: + value = USES_FULL_PPGTT(dev); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 65b5c83df3bb..6cdaa78f3a63 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -116,7 +116,8 @@ MODULE_PARM_DESC(enable_hangcheck, int i915_enable_ppgtt __read_mostly = -1; module_param_named(i915_enable_ppgtt, i915_enable_ppgtt, int, 0400); MODULE_PARM_DESC(i915_enable_ppgtt, - "Enable PPGTT (default: true)"); + "Override PPGTT usage. " + "(-1=auto [default], 0=disabled, 1=aliasing, 2=full)"); int i915_enable_psr __read_mostly = 0; module_param_named(enable_psr, i915_enable_psr, int, 0600); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a70d9c8b5277..8fd99ac96940 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1831,7 +1831,9 @@ struct drm_i915_file_private { #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) +#define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_BROADWELL(dev)) #define USES_ALIASING_PPGTT(dev) intel_enable_ppgtt(dev, false) +#define USES_FULL_PPGTT(dev) intel_enable_ppgtt(dev, true) #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) @@ -2012,6 +2014,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops); struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size); +void i915_init_vm(struct drm_i915_private *dev_priv, + struct i915_address_space *vm); void i915_gem_free_object(struct drm_gem_object *obj); void i915_gem_vma_destroy(struct i915_vma *vma); @@ -2290,7 +2294,8 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full) if (i915_enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) return false; - BUG_ON(full); + if (i915_enable_ppgtt == 1 && full) + return false; #ifdef CONFIG_INTEL_IOMMU /* Disable ppgtt on SNB if VT-d is on. */ @@ -2300,7 +2305,10 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full) } #endif - return HAS_ALIASING_PPGTT(dev); + if (full) + return HAS_PPGTT(dev); + else + return HAS_ALIASING_PPGTT(dev); } static inline void ppgtt_release(struct kref *kref) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a9cabff5aa37..f3b0025998db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2247,7 +2247,10 @@ request_to_vm(struct drm_i915_gem_request *request) struct drm_i915_private *dev_priv = request->ring->dev->dev_private; struct i915_address_space *vm; - vm = &dev_priv->gtt.base; + if (request->ctx) + vm = request->ctx->vm; + else + vm = &dev_priv->gtt.base; return vm; } @@ -2718,9 +2721,6 @@ int i915_vma_unbind(struct i915_vma *vma) drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret; - /* For now we only ever use 1 vma per object */ - WARN_ON(!list_is_singular(&obj->vma_list)); - if (list_empty(&vma->vma_link)) return 0; @@ -3268,17 +3268,12 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, i915_gem_object_pin_pages(obj); - BUG_ON(!i915_is_ggtt(vm)); - vma = i915_gem_obj_lookup_or_create_vma(obj, vm); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unpin; } - /* For now we only ever use 1 vma per object */ - WARN_ON(!list_is_singular(&obj->vma_list)); - search_free: /* FIXME: Some tests are failing when they receive a reloc of 0. To * prevent this, we simply don't allow the 0th offset. */ @@ -4182,9 +4177,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (obj->phys_obj) i915_gem_detach_phys_object(dev, obj); - /* NB: 0 or 1 elements */ - WARN_ON(!list_empty(&obj->vma_list) && - !list_is_singular(&obj->vma_list)); list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { int ret; @@ -4580,9 +4572,11 @@ init_ring_lists(struct intel_ring_buffer *ring) INIT_LIST_HEAD(&ring->request_list); } -static void i915_init_vm(struct drm_i915_private *dev_priv, - struct i915_address_space *vm) +void i915_init_vm(struct drm_i915_private *dev_priv, + struct i915_address_space *vm) { + if (!i915_is_ggtt(vm)) + drm_mm_init(&vm->mm, vm->start, vm->total); vm->dev = dev_priv->dev; INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 165a5c7d9424..ebe0f67eac08 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -288,17 +288,15 @@ i915_gem_create_context(struct drm_device *dev, DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret); goto err_destroy; } + + ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; } } else if (USES_ALIASING_PPGTT(dev)) { /* For platforms which only have aliasing PPGTT, we fake the * address space and refcounting. */ - kref_get(&dev_priv->mm.aliasing_ppgtt->ref); - } - - /* TODO: Until full ppgtt... */ - if (USES_ALIASING_PPGTT(dev)) ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; - else + kref_get(&dev_priv->mm.aliasing_ppgtt->ref); + } else ctx->vm = &dev_priv->gtt.base; return ctx; @@ -500,7 +498,7 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) mutex_lock(&dev->struct_mutex); file_priv->private_default_ctx = - i915_gem_create_context(dev, file_priv, false); + i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); mutex_unlock(&dev->struct_mutex); if (IS_ERR(file_priv->private_default_ctx)) { @@ -587,6 +585,7 @@ static int do_switch(struct intel_ring_buffer *ring, { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_hw_context *from = ring->last_context; + struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to); u32 hw_flags = 0; int ret, i; @@ -598,17 +597,15 @@ static int do_switch(struct intel_ring_buffer *ring, if (from == to && from->last_ring == ring && !to->remap_slice) return 0; - if (ring != &dev_priv->ring[RCS]) { - if (from) - i915_gem_context_unreference(from); - goto done; + /* Trying to pin first makes error handling easier. */ + if (ring == &dev_priv->ring[RCS]) { + ret = i915_gem_obj_ggtt_pin(to->obj, + get_context_alignment(ring->dev), + false, false); + if (ret) + return ret; } - ret = i915_gem_obj_ggtt_pin(to->obj, get_context_alignment(ring->dev), - false, false); - if (ret) - return ret; - /* * Pin can switch back to the default context if we end up calling into * evict_everything - as a last ditch gtt defrag effort that also @@ -616,6 +613,18 @@ static int do_switch(struct intel_ring_buffer *ring, */ from = ring->last_context; + if (USES_FULL_PPGTT(ring->dev)) { + ret = ppgtt->switch_mm(ppgtt, ring, false); + if (ret) + goto unpin_out; + } + + if (ring != &dev_priv->ring[RCS]) { + if (from) + i915_gem_context_unreference(from); + goto done; + } + /* * Clear this page out of any CPU caches for coherent swap-in/out. Note * that thanks to write = false in this call and us not setting any gpu @@ -625,10 +634,8 @@ static int do_switch(struct intel_ring_buffer *ring, * XXX: We need a real interface to do this instead of trickery. */ ret = i915_gem_object_set_to_gtt_domain(to->obj, false); - if (ret) { - i915_gem_object_ggtt_unpin(to->obj); - return ret; - } + if (ret) + goto unpin_out; if (!to->obj->has_global_gtt_mapping) { struct i915_vma *vma = i915_gem_obj_to_vma(to->obj, @@ -640,10 +647,8 @@ static int do_switch(struct intel_ring_buffer *ring, hw_flags |= MI_RESTORE_INHIBIT; ret = mi_set_context(ring, to, hw_flags); - if (ret) { - i915_gem_object_ggtt_unpin(to->obj); - return ret; - } + if (ret) + goto unpin_out; for (i = 0; i < MAX_L3_SLICES; i++) { if (!(to->remap_slice & (1<last_ring = ring; return 0; + +unpin_out: + if (ring->id == RCS) + i915_gem_object_ggtt_unpin(to->obj); + return ret; } /** @@ -736,7 +746,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = i915_gem_create_context(dev, file_priv, false); + ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8779d75bee1f..2e80f8c6d123 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -991,7 +991,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_hw_context *ctx; struct i915_address_space *vm; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); - u32 exec_start, exec_len; + u32 exec_start = args->batch_start_offset, exec_len; u32 mask, flags; int ret, mode, i; bool need_relocs; @@ -1112,9 +1112,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, i915_gem_context_reference(ctx); - /* HACK until we have full PPGTT */ - /* vm = ctx->vm; */ - vm = &dev_priv->gtt.base; + vm = ctx->vm; + if (!USES_FULL_PPGTT(dev)) + vm = &dev_priv->gtt.base; eb = eb_create(args); if (eb == NULL) { @@ -1170,6 +1170,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); } + if (flags & I915_DISPATCH_SECURE) + exec_start += i915_gem_obj_ggtt_offset(batch_obj); + else + exec_start += i915_gem_obj_offset(batch_obj, vm); + ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas); if (ret) goto err; @@ -1199,8 +1204,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - exec_start = i915_gem_obj_offset(batch_obj, vm) + - args->batch_start_offset; + exec_len = args->batch_len; if (cliprects) { for (i = 0; i < args->num_cliprects; i++) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 88e49b19baba..4143efd9c02b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -324,6 +324,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) container_of(vm, struct i915_hw_ppgtt, base); int i, j; + list_del(&vm->global_link); drm_mm_takedown(&vm->mm); for (i = 0; i < ppgtt->num_pd_pages ; i++) { @@ -755,6 +756,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) container_of(vm, struct i915_hw_ppgtt, base); int i; + list_del(&vm->global_link); drm_mm_takedown(&ppgtt->base.mm); drm_mm_remove_node(&ppgtt->node); @@ -901,17 +903,22 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) BUG(); if (!ret) { + struct drm_i915_private *dev_priv = dev->dev_private; kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); - if (INTEL_INFO(dev)->gen < 8) + i915_init_vm(dev_priv, &ppgtt->base); + if (INTEL_INFO(dev)->gen < 8) { gen6_write_pdes(ppgtt); + DRM_DEBUG("Adding PPGTT at offset %x\n", + ppgtt->pd_offset << 10); + } } return ret; } -static void __always_unused +static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) @@ -923,7 +930,7 @@ ppgtt_bind_vma(struct i915_vma *vma, vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level); } -static void __always_unused ppgtt_unbind_vma(struct i915_vma *vma) +static void ppgtt_unbind_vma(struct i915_vma *vma) { const unsigned long entry = vma->node.start >> PAGE_SHIFT; @@ -1719,8 +1726,13 @@ static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, case 8: case 7: case 6: - vma->unbind_vma = ggtt_unbind_vma; - vma->bind_vma = ggtt_bind_vma; + if (i915_is_ggtt(vm)) { + vma->unbind_vma = ggtt_unbind_vma; + vma->bind_vma = ggtt_bind_vma; + } else { + vma->unbind_vma = ppgtt_unbind_vma; + vma->bind_vma = ppgtt_bind_vma; + } break; case 5: case 4: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5dede92396c8..80773eca7311 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -909,11 +909,6 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, list_for_each_entry(vm, &dev_priv->vm_list, global_link) cnt++; - if (WARN(cnt > 1, "Multiple VMs not yet supported\n")) - cnt = 1; - - vm = &dev_priv->gtt.base; - error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC); error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC); error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count), diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 52aed893710a..d5b52846ae8f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_NO_RELOC 25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 #define I915_PARAM_HAS_WT 27 +#define I915_PARAM_HAS_FULL_PPGTT 28 typedef struct drm_i915_getparam { int param; -- cgit v1.2.3 From 7c9c4b8f5dfe224ce587a470ce8817214c92271e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Dec 2013 16:37:49 +0100 Subject: drm/i915: Reject non-default contexts on non-render again This reverts the abi-change from commit 67e3d2979be1bf42d1818b2961c671eb31e0b4d9 Author: Ben Widawsky Date: Fri Dec 6 14:11:01 2013 -0800 drm/i915: Permit contexts on all rings We don't actually need this, only the internal changes to allow contexts on all rings for the purpose of ppgtt switching are required. And I'm not sure whether this is the right thing to do given some of the hw features in the pipeline. Also, new abi needs userspace patches as a proof-of-need, which is completely lacking here. Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2e80f8c6d123..f5a1e0c34552 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -890,11 +890,14 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, static struct i915_hw_context * i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, - const u32 ctx_id) + struct intel_ring_buffer *ring, const u32 ctx_id) { struct i915_hw_context *ctx = NULL; struct i915_ctx_hang_stats *hs; + if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_ID) + return ERR_PTR(-EINVAL); + ctx = i915_gem_context_get(file->driver_priv, ctx_id); if (IS_ERR_OR_NULL(ctx)) return ctx; @@ -1103,7 +1106,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - ctx = i915_gem_validate_context(dev, file, ctx_id); + ctx = i915_gem_validate_context(dev, file, ring, ctx_id); if (IS_ERR_OR_NULL(ctx)) { mutex_unlock(&dev->struct_mutex); ret = PTR_ERR(ctx); -- cgit v1.2.3 From 2c9f8d56a1ccc9064180a95cf22531c4b37154be Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Dec 2013 17:38:53 +0100 Subject: drm/i915: Reject NEEDS_GTT relocations with full ppgtt Doesn't make sense. Spotted while fixing an issue Chris noticed in the same area. Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f5a1e0c34552..277485505bca 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -128,6 +128,12 @@ eb_lookup_vmas(struct eb_vmas *eb, struct i915_vma *vma; struct i915_address_space *bind_vm = vm; + if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && + USES_FULL_PPGTT(vm->dev)) { + ret = -EINVAL; + goto out; + } + /* If we have secure dispatch, or the userspace assures us that * they know what they're doing, use the GGTT VM. */ -- cgit v1.2.3 From a7c1d426ef335ccfb6bd567a3f616fa232418fa2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Dec 2013 17:46:18 +0100 Subject: drm/i915: Don't check for NEEDS_GTT when deciding the address space This means something different and is only relevant for gen6 and the reason why we cant use anything else than aliasing ppgtt there. Note that the currently implemented logic for secure batches is broken: Userspace wants the buffer both in ppgtt (for self-referencing relocations) and in ggtt (for priveledge operations). This is the same issue the command parser is also facing. Unfortunately our coverage for corner-cases of self-referencing batches is spotty. Note that this will break vsync'ed Xv and DRI2 copies. Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 277485505bca..a36511db1416 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -137,8 +137,7 @@ eb_lookup_vmas(struct eb_vmas *eb, /* If we have secure dispatch, or the userspace assures us that * they know what they're doing, use the GGTT VM. */ - if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT || - ((args->flags & I915_EXEC_SECURE) && + if (((args->flags & I915_EXEC_SECURE) && (i == (args->buffer_count - 1)))) bind_vm = &dev_priv->gtt.base; -- cgit v1.2.3 From 72ad5c45f0c9036cbc6d23aeff4e8beb6d8b5e33 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 2 Jan 2014 19:50:27 -1000 Subject: drm/i915/ppgtt: Fix ioctl errno for "no such context" Without this fix the ioctls silently succeeded (but actually did nothing). It makes all the code which calls into this function way too confusing. v2: Fix destroy IOCTL as well v3: Clarify the other two callers of i915_gem_context_get() to never check for NULL. (Mika) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72903 Signed-off-by: Ben Widawsky Testcase: igt/gem_ctx_exec/basic [danvet: Fix up the commit message and actually bother to mention the testcase this fixes.] Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 12 +++++++++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index ebe0f67eac08..44dddc00ca72 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -526,10 +526,16 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) { + struct i915_hw_context *ctx; + if (!HAS_HW_CONTEXTS(file_priv->dev_priv->dev)) return file_priv->private_default_ctx; - return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id); + ctx = (struct i915_hw_context *)idr_find(&file_priv->context_idr, id); + if (!ctx) + return ERR_PTR(-ENOENT); + + return ctx; } static inline int @@ -776,9 +782,9 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return ret; ctx = i915_gem_context_get(file_priv, args->ctx_id); - if (!ctx) { + if (IS_ERR(ctx)) { mutex_unlock(&dev->struct_mutex); - return -ENOENT; + return PTR_ERR(ctx); } idr_remove(&ctx->file_priv->context_idr, ctx->id); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a36511db1416..0843e0ec5f1b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -904,7 +904,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ERR_PTR(-EINVAL); ctx = i915_gem_context_get(file->driver_priv, ctx_id); - if (IS_ERR_OR_NULL(ctx)) + if (IS_ERR(ctx)) return ctx; hs = &ctx->hang_stats; @@ -1112,7 +1112,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } ctx = i915_gem_validate_context(dev, file, ring, ctx_id); - if (IS_ERR_OR_NULL(ctx)) { + if (IS_ERR(ctx)) { mutex_unlock(&dev->struct_mutex); ret = PTR_ERR(ctx); goto pre_mutex_err; -- cgit v1.2.3