diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 640 |
1 files changed, 358 insertions, 282 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e9503f6d1100..4c2016237d61 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -32,6 +32,7 @@ #include <linux/uaccess.h> #include <drm/drmP.h> +#include <drm/drm_syncobj.h> #include <drm/i915_drm.h> #include "i915_drv.h" @@ -191,6 +192,8 @@ struct i915_execbuffer { struct drm_file *file; /** per-file lookup tables and limits */ struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ + struct i915_vma **vma; + unsigned int *flags; struct intel_engine_cs *engine; /** engine to queue the request to */ struct i915_gem_context *ctx; /** context for building the request */ @@ -244,13 +247,7 @@ struct i915_execbuffer { struct hlist_head *buckets; /** ht for relocation handles */ }; -/* - * As an alternative to creating a hashtable of handle-to-vma for a batch, - * we used the last available reserved field in the execobject[] and stash - * a link from the execobj to its vma. - */ -#define __exec_to_vma(ee) (ee)->rsvd2 -#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee)) +#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) /* * Used to convert any address to canonical form. @@ -319,85 +316,82 @@ static int eb_create(struct i915_execbuffer *eb) static bool eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, - const struct i915_vma *vma) + const struct i915_vma *vma, + unsigned int flags) { - if (!(entry->flags & __EXEC_OBJECT_HAS_PIN)) - return true; - if (vma->node.size < entry->pad_to_size) return true; if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) return true; - if (entry->flags & EXEC_OBJECT_PINNED && + if (flags & EXEC_OBJECT_PINNED && vma->node.start != entry->offset) return true; - if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && + if (flags & __EXEC_OBJECT_NEEDS_BIAS && vma->node.start < BATCH_OFFSET_BIAS) return true; - if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && + if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && (vma->node.start + vma->node.size - 1) >> 32) return true; return false; } -static inline void +static inline bool eb_pin_vma(struct i915_execbuffer *eb, - struct drm_i915_gem_exec_object2 *entry, + const struct drm_i915_gem_exec_object2 *entry, struct i915_vma *vma) { - u64 flags; + unsigned int exec_flags = *vma->exec_flags; + u64 pin_flags; if (vma->node.size) - flags = vma->node.start; + pin_flags = vma->node.start; else - flags = entry->offset & PIN_OFFSET_MASK; + pin_flags = entry->offset & PIN_OFFSET_MASK; - flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; - if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT)) - flags |= PIN_GLOBAL; + pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT)) + pin_flags |= PIN_GLOBAL; - if (unlikely(i915_vma_pin(vma, 0, 0, flags))) - return; + if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) + return false; - if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { if (unlikely(i915_vma_get_fence(vma))) { i915_vma_unpin(vma); - return; + return false; } if (i915_vma_pin_fence(vma)) - entry->flags |= __EXEC_OBJECT_HAS_FENCE; + exec_flags |= __EXEC_OBJECT_HAS_FENCE; } - entry->flags |= __EXEC_OBJECT_HAS_PIN; + *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; + return !eb_vma_misplaced(entry, vma, exec_flags); } -static inline void -__eb_unreserve_vma(struct i915_vma *vma, - const struct drm_i915_gem_exec_object2 *entry) +static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) { - GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN)); + GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); - if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) + if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) i915_vma_unpin_fence(vma); __i915_vma_unpin(vma); } static inline void -eb_unreserve_vma(struct i915_vma *vma, - struct drm_i915_gem_exec_object2 *entry) +eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags) { - if (!(entry->flags & __EXEC_OBJECT_HAS_PIN)) + if (!(*flags & __EXEC_OBJECT_HAS_PIN)) return; - __eb_unreserve_vma(vma, entry); - entry->flags &= ~__EXEC_OBJECT_RESERVED; + __eb_unreserve_vma(vma, *flags); + *flags &= ~__EXEC_OBJECT_RESERVED; } static int @@ -427,7 +421,7 @@ eb_validate_vma(struct i915_execbuffer *eb, entry->pad_to_size = 0; } - if (unlikely(vma->exec_entry)) { + if (unlikely(vma->exec_flags)) { DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", entry->handle, (int)(entry - eb->exec)); return -EINVAL; @@ -440,14 +434,25 @@ eb_validate_vma(struct i915_execbuffer *eb, */ entry->offset = gen8_noncanonical_addr(entry->offset); + if (!eb->reloc_cache.has_fence) { + entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; + } else { + if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || + eb->reloc_cache.needs_unfenced) && + i915_gem_object_is_tiled(vma->obj)) + entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; + } + + if (!(entry->flags & EXEC_OBJECT_PINNED)) + entry->flags |= eb->context_flags; + return 0; } static int -eb_add_vma(struct i915_execbuffer *eb, - struct drm_i915_gem_exec_object2 *entry, - struct i915_vma *vma) +eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; int err; GEM_BUG_ON(i915_vma_is_closed(vma)); @@ -468,40 +473,28 @@ eb_add_vma(struct i915_execbuffer *eb, if (entry->relocation_count) list_add_tail(&vma->reloc_link, &eb->relocs); - if (!eb->reloc_cache.has_fence) { - entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; - } else { - if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || - eb->reloc_cache.needs_unfenced) && - i915_gem_object_is_tiled(vma->obj)) - entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; - } - - if (!(entry->flags & EXEC_OBJECT_PINNED)) - entry->flags |= eb->context_flags; - /* * Stash a pointer from the vma to execobj, so we can query its flags, * size, alignment etc as provided by the user. Also we stash a pointer * to the vma inside the execobj so that we can use a direct lookup * to find the right target VMA when doing relocations. */ - vma->exec_entry = entry; - __exec_to_vma(entry) = (uintptr_t)vma; + eb->vma[i] = vma; + eb->flags[i] = entry->flags; + vma->exec_flags = &eb->flags[i]; err = 0; - eb_pin_vma(eb, entry, vma); - if (eb_vma_misplaced(entry, vma)) { - eb_unreserve_vma(vma, entry); - - list_add_tail(&vma->exec_link, &eb->unbound); - if (drm_mm_node_allocated(&vma->node)) - err = i915_vma_unbind(vma); - } else { + if (eb_pin_vma(eb, entry, vma)) { if (entry->offset != vma->node.start) { entry->offset = vma->node.start | UPDATE; eb->args->flags |= __EXEC_HAS_RELOC; } + } else { + eb_unreserve_vma(vma, vma->exec_flags); + + list_add_tail(&vma->exec_link, &eb->unbound); + if (drm_mm_node_allocated(&vma->node)) + err = i915_vma_unbind(vma); } return err; } @@ -526,32 +519,35 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache, static int eb_reserve_vma(const struct i915_execbuffer *eb, struct i915_vma *vma) { - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - u64 flags; + struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); + unsigned int exec_flags = *vma->exec_flags; + u64 pin_flags; int err; - flags = PIN_USER | PIN_NONBLOCK; - if (entry->flags & EXEC_OBJECT_NEEDS_GTT) - flags |= PIN_GLOBAL; + pin_flags = PIN_USER | PIN_NONBLOCK; + if (exec_flags & EXEC_OBJECT_NEEDS_GTT) + pin_flags |= PIN_GLOBAL; /* * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, * limit address to the first 4GBs for unflagged objects. */ - if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) - flags |= PIN_ZONE_4G; + if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) + pin_flags |= PIN_ZONE_4G; - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) - flags |= PIN_MAPPABLE; + if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) + pin_flags |= PIN_MAPPABLE; - if (entry->flags & EXEC_OBJECT_PINNED) { - flags |= entry->offset | PIN_OFFSET_FIXED; - flags &= ~PIN_NONBLOCK; /* force overlapping PINNED checks */ - } else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) { - flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + if (exec_flags & EXEC_OBJECT_PINNED) { + pin_flags |= entry->offset | PIN_OFFSET_FIXED; + pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */ + } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) { + pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; } - err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags); + err = i915_vma_pin(vma, + entry->pad_to_size, entry->alignment, + pin_flags); if (err) return err; @@ -560,7 +556,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, eb->args->flags |= __EXEC_HAS_RELOC; } - if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { err = i915_vma_get_fence(vma); if (unlikely(err)) { i915_vma_unpin(vma); @@ -568,11 +564,11 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, } if (i915_vma_pin_fence(vma)) - entry->flags |= __EXEC_OBJECT_HAS_FENCE; + exec_flags |= __EXEC_OBJECT_HAS_FENCE; } - entry->flags |= __EXEC_OBJECT_HAS_PIN; - GEM_BUG_ON(eb_vma_misplaced(entry, vma)); + *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; + GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags)); return 0; } @@ -614,18 +610,18 @@ static int eb_reserve(struct i915_execbuffer *eb) INIT_LIST_HEAD(&eb->unbound); INIT_LIST_HEAD(&last); for (i = 0; i < count; i++) { - struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + unsigned int flags = eb->flags[i]; + struct i915_vma *vma = eb->vma[i]; - if (entry->flags & EXEC_OBJECT_PINNED && - entry->flags & __EXEC_OBJECT_HAS_PIN) + if (flags & EXEC_OBJECT_PINNED && + flags & __EXEC_OBJECT_HAS_PIN) continue; - vma = exec_to_vma(entry); - eb_unreserve_vma(vma, entry); + eb_unreserve_vma(vma, &eb->flags[i]); - if (entry->flags & EXEC_OBJECT_PINNED) + if (flags & EXEC_OBJECT_PINNED) list_add(&vma->exec_link, &eb->unbound); - else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) + else if (flags & __EXEC_OBJECT_NEEDS_MAP) list_add_tail(&vma->exec_link, &eb->unbound); else list_add_tail(&vma->exec_link, &last); @@ -649,19 +645,6 @@ static int eb_reserve(struct i915_execbuffer *eb) } while (1); } -static inline struct hlist_head * -ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle) -{ - return &lut->ht[hash_32(handle, lut->ht_bits)]; -} - -static inline bool -ht_needs_resize(const struct i915_gem_context_vma_lut *lut) -{ - return (4*lut->ht_count > 3*lut->ht_size || - 4*lut->ht_count + 1 < lut->ht_size); -} - static unsigned int eb_batch_index(const struct i915_execbuffer *eb) { if (eb->args->flags & I915_EXEC_BATCH_FIRST) @@ -675,16 +658,10 @@ static int eb_select_context(struct i915_execbuffer *eb) struct i915_gem_context *ctx; ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); - if (unlikely(IS_ERR(ctx))) - return PTR_ERR(ctx); - - if (unlikely(i915_gem_context_is_banned(ctx))) { - DRM_DEBUG("Context %u tried to submit while banned\n", - ctx->user_handle); - return -EIO; - } + if (unlikely(!ctx)) + return -ENOENT; - eb->ctx = i915_gem_context_get(ctx); + eb->ctx = ctx; eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base; eb->context_flags = 0; @@ -696,132 +673,74 @@ static int eb_select_context(struct i915_execbuffer *eb) static int eb_lookup_vmas(struct i915_execbuffer *eb) { -#define INTERMEDIATE BIT(0) - const unsigned int count = eb->buffer_count; - struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut; - struct i915_vma *vma; - struct idr *idr; + struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; + struct drm_i915_gem_object *uninitialized_var(obj); unsigned int i; - int slow_pass = -1; int err; - INIT_LIST_HEAD(&eb->relocs); - INIT_LIST_HEAD(&eb->unbound); - - if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)) - flush_work(&lut->resize); - GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); - - for (i = 0; i < count; i++) { - __exec_to_vma(&eb->exec[i]) = 0; - - hlist_for_each_entry(vma, - ht_head(lut, eb->exec[i].handle), - ctx_node) { - if (vma->ctx_handle != eb->exec[i].handle) - continue; - - err = eb_add_vma(eb, &eb->exec[i], vma); - if (unlikely(err)) - return err; - - goto next_vma; - } + if (unlikely(i915_gem_context_is_closed(eb->ctx))) + return -ENOENT; - if (slow_pass < 0) - slow_pass = i; -next_vma: ; - } + if (unlikely(i915_gem_context_is_banned(eb->ctx))) + return -EIO; - if (slow_pass < 0) - goto out; + INIT_LIST_HEAD(&eb->relocs); + INIT_LIST_HEAD(&eb->unbound); - spin_lock(&eb->file->table_lock); - /* - * Grab a reference to the object and release the lock so we can lookup - * or create the VMA without using GFP_ATOMIC - */ - idr = &eb->file->object_idr; - for (i = slow_pass; i < count; i++) { - struct drm_i915_gem_object *obj; + for (i = 0; i < eb->buffer_count; i++) { + u32 handle = eb->exec[i].handle; + struct i915_lut_handle *lut; + struct i915_vma *vma; - if (__exec_to_vma(&eb->exec[i])) - continue; + vma = radix_tree_lookup(handles_vma, handle); + if (likely(vma)) + goto add_vma; - obj = to_intel_bo(idr_find(idr, eb->exec[i].handle)); + obj = i915_gem_object_lookup(eb->file, handle); if (unlikely(!obj)) { - spin_unlock(&eb->file->table_lock); - DRM_DEBUG("Invalid object handle %d at index %d\n", - eb->exec[i].handle, i); err = -ENOENT; - goto err; + goto err_vma; } - __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj; - } - spin_unlock(&eb->file->table_lock); - - for (i = slow_pass; i < count; i++) { - struct drm_i915_gem_object *obj; - - if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)) - continue; - - /* - * NOTE: We can leak any vmas created here when something fails - * later on. But that's no issue since vma_unbind can deal with - * vmas which are not actually bound. And since only - * lookup_or_create exists as an interface to get at the vma - * from the (obj, vm) we don't run the risk of creating - * duplicated vmas for the same vm. - */ - obj = u64_to_ptr(typeof(*obj), - __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE); vma = i915_vma_instance(obj, eb->vm, NULL); if (unlikely(IS_ERR(vma))) { - DRM_DEBUG("Failed to lookup VMA\n"); err = PTR_ERR(vma); - goto err; + goto err_obj; } - /* First come, first served */ - if (!vma->ctx) { - vma->ctx = eb->ctx; - vma->ctx_handle = eb->exec[i].handle; - hlist_add_head(&vma->ctx_node, - ht_head(lut, eb->exec[i].handle)); - lut->ht_count++; - lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS; - if (i915_vma_is_ggtt(vma)) { - GEM_BUG_ON(obj->vma_hashed); - obj->vma_hashed = vma; - } + lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL); + if (unlikely(!lut)) { + err = -ENOMEM; + goto err_obj; + } - i915_vma_get(vma); + err = radix_tree_insert(handles_vma, handle, vma); + if (unlikely(err)) { + kfree(lut); + goto err_obj; } - err = eb_add_vma(eb, &eb->exec[i], vma); - if (unlikely(err)) - goto err; + list_add(&lut->obj_link, &obj->lut_list); + list_add(&lut->ctx_link, &eb->ctx->handles_list); + lut->ctx = eb->ctx; + lut->handle = handle; - /* Only after we validated the user didn't use our bits */ - if (vma->ctx != eb->ctx) { - i915_vma_get(vma); - eb->exec[i].flags |= __EXEC_OBJECT_HAS_REF; - } - } + /* transfer ref to ctx */ + obj = NULL; - if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) { - if (ht_needs_resize(lut)) - queue_work(system_highpri_wq, &lut->resize); - else - lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; +add_vma: + err = eb_add_vma(eb, i, vma); + if (unlikely(err)) + goto err_obj; + + GEM_BUG_ON(vma != eb->vma[i]); + GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); } -out: /* take note of the batch buffer before we might reorder the lists */ i = eb_batch_index(eb); - eb->batch = exec_to_vma(&eb->exec[i]); + eb->batch = eb->vma[i]; + GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]); /* * SNA is doing fancy tricks with compressing batch buffers, which leads @@ -832,22 +751,20 @@ out: * Note that actual hangs have only been observed on gen7, but for * paranoia do it everywhere. */ - if (!(eb->exec[i].flags & EXEC_OBJECT_PINNED)) - eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS; + if (!(eb->flags[i] & EXEC_OBJECT_PINNED)) + eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; if (eb->reloc_cache.has_fence) - eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE; + eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; eb->args->flags |= __EXEC_VALIDATED; return eb_reserve(eb); -err: - for (i = slow_pass; i < count; i++) { - if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) - __exec_to_vma(&eb->exec[i]) = 0; - } - lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; +err_obj: + if (obj) + i915_gem_object_put(obj); +err_vma: + eb->vma[i] = NULL; return err; -#undef INTERMEDIATE } static struct i915_vma * @@ -856,7 +773,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) if (eb->lut_size < 0) { if (handle >= -eb->lut_size) return NULL; - return exec_to_vma(&eb->exec[handle]); + return eb->vma[handle]; } else { struct hlist_head *head; struct i915_vma *vma; @@ -876,24 +793,21 @@ static void eb_release_vmas(const struct i915_execbuffer *eb) unsigned int i; for (i = 0; i < count; i++) { - struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; - struct i915_vma *vma = exec_to_vma(entry); + struct i915_vma *vma = eb->vma[i]; + unsigned int flags = eb->flags[i]; if (!vma) - continue; + break; - GEM_BUG_ON(vma->exec_entry != entry); - vma->exec_entry = NULL; - __exec_to_vma(entry) = 0; + GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); + vma->exec_flags = NULL; + eb->vma[i] = NULL; - if (entry->flags & __EXEC_OBJECT_HAS_PIN) - __eb_unreserve_vma(vma, entry); + if (flags & __EXEC_OBJECT_HAS_PIN) + __eb_unreserve_vma(vma, flags); - if (entry->flags & __EXEC_OBJECT_HAS_REF) + if (flags & __EXEC_OBJECT_HAS_REF) i915_vma_put(vma); - - entry->flags &= - ~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF); } } @@ -1266,7 +1180,9 @@ relocate_entry(struct i915_vma *vma, if (!eb->reloc_cache.vaddr && (DBG_FORCE_RELOC == FORCE_GPU_RELOC || - !reservation_object_test_signaled_rcu(vma->resv, true))) { + !reservation_object_test_signaled_rcu(vma->resv, true)) && + __intel_engine_can_store_dword(eb->reloc_cache.gen, + eb->engine->class)) { const unsigned int gen = eb->reloc_cache.gen; unsigned int len; u32 *batch; @@ -1276,10 +1192,8 @@ relocate_entry(struct i915_vma *vma, len = offset & 7 ? 8 : 5; else if (gen >= 4) len = 4; - else if (gen >= 3) + else len = 3; - else /* On gen2 MI_STORE_DWORD_IMM uses a physical address */ - goto repeat; batch = reloc_gpu(eb, vma, len); if (IS_ERR(batch)) @@ -1382,7 +1296,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, } if (reloc->write_domain) { - target->exec_entry->flags |= EXEC_OBJECT_WRITE; + *target->exec_flags |= EXEC_OBJECT_WRITE; /* * Sandybridge PPGTT errata: We need a global gtt mapping @@ -1432,9 +1346,9 @@ eb_relocate_entry(struct i915_execbuffer *eb, * patching using the GPU (though that should be serialised by the * timeline). To be completely sure, and since we are required to * do relocations we are already stalling, disable the user's opt - * of our synchronisation. + * out of our synchronisation. */ - vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; + *vma->exec_flags &= ~EXEC_OBJECT_ASYNC; /* and update the user's relocation entry */ return relocate_entry(vma, reloc, eb, target); @@ -1445,7 +1359,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *urelocs; - const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); unsigned int remain; urelocs = u64_to_user_ptr(entry->relocs_ptr); @@ -1528,7 +1442,7 @@ out: static int eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) { - const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); struct drm_i915_gem_relocation_entry *relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); unsigned int i; @@ -1732,6 +1646,8 @@ repeat: if (err) goto err; + GEM_BUG_ON(!eb->batch); + list_for_each_entry(vma, &eb->relocs, reloc_link) { if (!have_copy) { pagefault_disable(); @@ -1825,11 +1741,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) int err; for (i = 0; i < count; i++) { - struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; - struct i915_vma *vma = exec_to_vma(entry); + unsigned int flags = eb->flags[i]; + struct i915_vma *vma = eb->vma[i]; struct drm_i915_gem_object *obj = vma->obj; - if (entry->flags & EXEC_OBJECT_CAPTURE) { + if (flags & EXEC_OBJECT_CAPTURE) { struct i915_gem_capture_list *capture; capture = kmalloc(sizeof(*capture), GFP_KERNEL); @@ -1837,35 +1753,47 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) return -ENOMEM; capture->next = eb->request->capture_list; - capture->vma = vma; + capture->vma = eb->vma[i]; eb->request->capture_list = capture; } - if (unlikely(obj->cache_dirty && !obj->cache_coherent)) { + /* + * If the GPU is not _reading_ through the CPU cache, we need + * to make sure that any writes (both previous GPU writes from + * before a change in snooping levels and normal CPU writes) + * caught in that cache are flushed to main memory. + * + * We want to say + * obj->cache_dirty && + * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) + * but gcc's optimiser doesn't handle that as well and emits + * two jumps instead of one. Maybe one day... + */ + if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { if (i915_gem_clflush_object(obj, 0)) - entry->flags &= ~EXEC_OBJECT_ASYNC; + flags &= ~EXEC_OBJECT_ASYNC; } - if (entry->flags & EXEC_OBJECT_ASYNC) - goto skip_flushes; + if (flags & EXEC_OBJECT_ASYNC) + continue; err = i915_gem_request_await_object - (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE); + (eb->request, obj, flags & EXEC_OBJECT_WRITE); if (err) return err; - -skip_flushes: - i915_vma_move_to_active(vma, eb->request, entry->flags); - __eb_unreserve_vma(vma, entry); - vma->exec_entry = NULL; } for (i = 0; i < count; i++) { - const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; - struct i915_vma *vma = exec_to_vma(entry); + unsigned int flags = eb->flags[i]; + struct i915_vma *vma = eb->vma[i]; - eb_export_fence(vma, eb->request, entry->flags); - if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF)) + i915_vma_move_to_active(vma, eb->request, flags); + eb_export_fence(vma, eb->request, flags); + + __eb_unreserve_vma(vma, flags); + vma->exec_flags = NULL; + + if (unlikely(flags & __EXEC_OBJECT_HAS_REF)) i915_vma_put(vma); } eb->exec = NULL; @@ -1883,8 +1811,10 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return false; /* Kernel clipping was a DRI1 misfeature */ - if (exec->num_cliprects || exec->cliprects_ptr) - return false; + if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { + if (exec->num_cliprects || exec->cliprects_ptr) + return false; + } if (exec->DR4 == 0xffffffff) { DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); @@ -1992,11 +1922,11 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) if (IS_ERR(vma)) goto out; - vma->exec_entry = - memset(&eb->exec[eb->buffer_count++], - 0, sizeof(*vma->exec_entry)); - vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; - __exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma); + eb->vma[eb->buffer_count] = i915_vma_get(vma); + eb->flags[eb->buffer_count] = + __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; + vma->exec_flags = &eb->flags[eb->buffer_count]; + eb->buffer_count++; out: i915_gem_object_unpin_pages(shadow_batch_obj); @@ -2115,11 +2045,129 @@ eb_select_engine(struct drm_i915_private *dev_priv, return engine; } +static void +__free_fence_array(struct drm_syncobj **fences, unsigned int n) +{ + while (n--) + drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + kvfree(fences); +} + +static struct drm_syncobj ** +get_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct drm_file *file) +{ + const unsigned int nfences = args->num_cliprects; + struct drm_i915_gem_exec_fence __user *user; + struct drm_syncobj **fences; + unsigned int n; + int err; + + if (!(args->flags & I915_EXEC_FENCE_ARRAY)) + return NULL; + + if (nfences > SIZE_MAX / sizeof(*fences)) + return ERR_PTR(-EINVAL); + + user = u64_to_user_ptr(args->cliprects_ptr); + if (!access_ok(VERIFY_READ, user, nfences * 2 * sizeof(u32))) + return ERR_PTR(-EFAULT); + + fences = kvmalloc_array(args->num_cliprects, sizeof(*fences), + __GFP_NOWARN | GFP_TEMPORARY); + if (!fences) + return ERR_PTR(-ENOMEM); + + for (n = 0; n < nfences; n++) { + struct drm_i915_gem_exec_fence fence; + struct drm_syncobj *syncobj; + + if (__copy_from_user(&fence, user++, sizeof(fence))) { + err = -EFAULT; + goto err; + } + + syncobj = drm_syncobj_find(file, fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -ENOENT; + goto err; + } + + fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + } + + return fences; + +err: + __free_fence_array(fences, n); + return ERR_PTR(err); +} + +static void +put_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct drm_syncobj **fences) +{ + if (fences) + __free_fence_array(fences, args->num_cliprects); +} + +static int +await_fence_array(struct i915_execbuffer *eb, + struct drm_syncobj **fences) +{ + const unsigned int nfences = eb->args->num_cliprects; + unsigned int n; + int err; + + for (n = 0; n < nfences; n++) { + struct drm_syncobj *syncobj; + struct dma_fence *fence; + unsigned int flags; + + syncobj = ptr_unpack_bits(fences[n], &flags, 2); + if (!(flags & I915_EXEC_FENCE_WAIT)) + continue; + + fence = drm_syncobj_fence_get(syncobj); + if (!fence) + return -EINVAL; + + err = i915_gem_request_await_dma_fence(eb->request, fence); + dma_fence_put(fence); + if (err < 0) + return err; + } + + return 0; +} + +static void +signal_fence_array(struct i915_execbuffer *eb, + struct drm_syncobj **fences) +{ + const unsigned int nfences = eb->args->num_cliprects; + struct dma_fence * const fence = &eb->request->fence; + unsigned int n; + + for (n = 0; n < nfences; n++) { + struct drm_syncobj *syncobj; + unsigned int flags; + + syncobj = ptr_unpack_bits(fences[n], &flags, 2); + if (!(flags & I915_EXEC_FENCE_SIGNAL)) + continue; + + drm_syncobj_replace_fence(syncobj, fence); + } +} + static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec) + struct drm_i915_gem_exec_object2 *exec, + struct drm_syncobj **fences) { struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; @@ -2135,8 +2183,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.args = args; if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) args->flags |= __EXEC_HAS_RELOC; + eb.exec = exec; - eb.ctx = NULL; + eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1); + eb.vma[0] = NULL; + eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1); + eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; if (USES_FULL_PPGTT(eb.i915)) eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT; @@ -2194,6 +2246,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, GEM_BUG_ON(!eb.lut_size); + err = eb_select_context(&eb); + if (unlikely(err)) + goto err_destroy; + /* * Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the @@ -2202,14 +2258,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, * 100ms. */ intel_runtime_pm_get(eb.i915); + err = i915_mutex_lock_interruptible(dev); if (err) goto err_rpm; - err = eb_select_context(&eb); - if (unlikely(err)) - goto err_unlock; - err = eb_relocate(&eb); if (err) { /* @@ -2223,7 +2276,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) { + if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); err = -EINVAL; goto err_vma; @@ -2305,6 +2358,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_request; } + if (fences) { + err = await_fence_array(&eb, fences); + if (err) + goto err_request; + } + if (out_fence_fd != -1) { out_fence = sync_file_create(&eb.request->fence); if (!out_fence) { @@ -2328,6 +2387,9 @@ err_request: __i915_add_request(eb.request, err == 0); add_to_client(eb.request, file); + if (fences) + signal_fence_array(&eb, fences); + if (out_fence) { if (err == 0) { fd_install(out_fence_fd, out_fence->file); @@ -2345,11 +2407,11 @@ err_batch_unpin: err_vma: if (eb.exec) eb_release_vmas(&eb); - i915_gem_context_put(eb.ctx); -err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: intel_runtime_pm_put(eb.i915); + i915_gem_context_put(eb.ctx); +err_destroy: eb_destroy(&eb); err_out_fence: if (out_fence_fd != -1) @@ -2367,7 +2429,9 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file) { - const size_t sz = sizeof(struct drm_i915_gem_exec_object2); + const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) + + sizeof(struct i915_vma *) + + sizeof(unsigned int)); struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_exec_object *exec_list = NULL; @@ -2429,7 +2493,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2_list[i].flags = 0; } - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); if (exec2.flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); @@ -2458,9 +2522,12 @@ int i915_gem_execbuffer2(struct drm_device *dev, void *data, struct drm_file *file) { - const size_t sz = sizeof(struct drm_i915_gem_exec_object2); + const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) + + sizeof(struct i915_vma *) + + sizeof(unsigned int)); struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; + struct drm_syncobj **fences = NULL; int err; if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { @@ -2487,7 +2554,15 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EFAULT; } - err = i915_gem_do_execbuffer(dev, file, args, exec2_list); + if (args->flags & I915_EXEC_FENCE_ARRAY) { + fences = get_fence_array(args, file); + if (IS_ERR(fences)) { + kvfree(exec2_list); + return PTR_ERR(fences); + } + } + + err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); /* * Now that we have begun execution of the batchbuffer, we ignore @@ -2517,6 +2592,7 @@ end_user: } args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; + put_fence_array(args, fences); kvfree(exec2_list); return err; } |