diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
56 files changed, 5992 insertions, 3261 deletions
diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile deleted file mode 100644 index 07e7b8b840ea..000000000000 --- a/drivers/gpu/drm/i915/gem/Makefile +++ /dev/null @@ -1 +0,0 @@ -include $(src)/Makefile.header-test # Extra header tests diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test deleted file mode 100644 index 61e06cbb4b32..000000000000 --- a/drivers/gpu/drm/i915/gem/Makefile.header-test +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2019 Intel Corporation - -# Test the headers are compilable as standalone units -header_test := $(notdir $(wildcard $(src)/*.h)) - -quiet_cmd_header_test = HDRTEST $@ - cmd_header_test = echo "\#include \"$(<F)\"" > $@ - -header_test_%.c: %.h - $(call cmd,header_test) - -extra-$(CONFIG_DRM_I915_WERROR) += \ - $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h))) - -clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h))) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6ad93a09968c..25235ef630c1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -9,16 +9,16 @@ #include "i915_gem_ioctls.h" #include "i915_gem_object.h" -static __always_inline u32 __busy_read_flag(u8 id) +static __always_inline u32 __busy_read_flag(u16 id) { - if (id == (u8)I915_ENGINE_CLASS_INVALID) + if (id == (u16)I915_ENGINE_CLASS_INVALID) return 0xffff0000u; GEM_BUG_ON(id >= 16); return 0x10000u << id; } -static __always_inline u32 __busy_write_id(u8 id) +static __always_inline u32 __busy_write_id(u16 id) { /* * The uABI guarantees an active writer is also amongst the read @@ -29,14 +29,14 @@ static __always_inline u32 __busy_write_id(u8 id) * last_read - hence we always set both read and write busy for * last_write. */ - if (id == (u8)I915_ENGINE_CLASS_INVALID) + if (id == (u16)I915_ENGINE_CLASS_INVALID) return 0xffffffffu; return (id + 1) | __busy_read_flag(id); } static __always_inline unsigned int -__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id)) +__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) { const struct i915_request *rq; @@ -57,7 +57,7 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id)) return 0; /* Beware type-expansion follies! */ - BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class)); + BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); return flag(rq->engine->uabi_class); } @@ -82,7 +82,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct reservation_object_list *list; + struct dma_resv_list *list; unsigned int seq; int err; @@ -105,7 +105,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * Alternatively, we can trade that extra information on read/write * activity with * args->busy = - * !reservation_object_test_signaled_rcu(obj->resv, true); + * !dma_resv_test_signaled_rcu(obj->resv, true); * to report the overall busyness. This is what the wait-ioctl does. * */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 5295285d5843..34be4c0ee7c5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -8,87 +8,65 @@ #include "i915_drv.h" #include "i915_gem_clflush.h" - -static DEFINE_SPINLOCK(clflush_lock); +#include "i915_sw_fence_work.h" +#include "i915_trace.h" struct clflush { - struct dma_fence dma; /* Must be first for dma_fence_free() */ - struct i915_sw_fence wait; - struct work_struct work; + struct dma_fence_work base; struct drm_i915_gem_object *obj; }; -static const char *i915_clflush_get_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) -{ - return "clflush"; -} - -static void i915_clflush_release(struct dma_fence *fence) -{ - struct clflush *clflush = container_of(fence, typeof(*clflush), dma); - - i915_sw_fence_fini(&clflush->wait); - - BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); - dma_fence_free(&clflush->dma); -} - -static const struct dma_fence_ops i915_clflush_ops = { - .get_driver_name = i915_clflush_get_driver_name, - .get_timeline_name = i915_clflush_get_timeline_name, - .release = i915_clflush_release, -}; - -static void __i915_do_clflush(struct drm_i915_gem_object *obj) +static void __do_clflush(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages); - intel_fb_obj_flush(obj, ORIGIN_CPU); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); } -static void i915_clflush_work(struct work_struct *work) +static int clflush_work(struct dma_fence_work *base) { - struct clflush *clflush = container_of(work, typeof(*clflush), work); + struct clflush *clflush = container_of(base, typeof(*clflush), base); struct drm_i915_gem_object *obj = clflush->obj; + int err; - if (i915_gem_object_pin_pages(obj)) { - DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); - goto out; - } - - __i915_do_clflush(obj); + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + __do_clflush(obj); i915_gem_object_unpin_pages(obj); -out: - i915_gem_object_put(obj); + return 0; +} + +static void clflush_release(struct dma_fence_work *base) +{ + struct clflush *clflush = container_of(base, typeof(*clflush), base); - dma_fence_signal(&clflush->dma); - dma_fence_put(&clflush->dma); + i915_gem_object_put(clflush->obj); } -static int __i915_sw_fence_call -i915_clflush_notify(struct i915_sw_fence *fence, - enum i915_sw_fence_notify state) +static const struct dma_fence_work_ops clflush_ops = { + .name = "clflush", + .work = clflush_work, + .release = clflush_release, +}; + +static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) { - struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + struct clflush *clflush; - switch (state) { - case FENCE_COMPLETE: - schedule_work(&clflush->work); - break; + GEM_BUG_ON(!obj->cache_dirty); - case FENCE_FREE: - dma_fence_put(&clflush->dma); - break; - } + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (!clflush) + return NULL; - return NOTIFY_DONE; + dma_fence_work_init(&clflush->base, &clflush_ops); + clflush->obj = i915_gem_object_get(obj); /* obj <-> clflush cycle */ + + return clflush; } bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, @@ -126,33 +104,16 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, clflush = NULL; if (!(flags & I915_CLFLUSH_SYNC)) - clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + clflush = clflush_work_create(obj); if (clflush) { - GEM_BUG_ON(!obj->cache_dirty); - - dma_fence_init(&clflush->dma, - &i915_clflush_ops, - &clflush_lock, - to_i915(obj->base.dev)->mm.unordered_timeline, - 0); - i915_sw_fence_init(&clflush->wait, i915_clflush_notify); - - clflush->obj = i915_gem_object_get(obj); - INIT_WORK(&clflush->work, i915_clflush_work); - - dma_fence_get(&clflush->dma); - - i915_sw_fence_await_reservation(&clflush->wait, - obj->base.resv, NULL, - true, I915_FENCE_TIMEOUT, + i915_sw_fence_await_reservation(&clflush->base.chain, + obj->base.resv, NULL, true, + I915_FENCE_TIMEOUT, I915_FENCE_GFP); - - reservation_object_add_excl_fence(obj->base.resv, - &clflush->dma); - - i915_sw_fence_commit(&clflush->wait); + dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); + dma_fence_work_commit(&clflush->base); } else if (obj->mm.pages) { - __i915_do_clflush(obj); + __do_clflush(obj); } else { GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 1fdab0767a47..81366aa4812b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -2,10 +2,13 @@ /* * Copyright © 2019 Intel Corporation */ -#include "i915_gem_client_blt.h" +#include "i915_drv.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_pool.h" +#include "i915_gem_client_blt.h" #include "i915_gem_object_blt.h" -#include "intel_drv.h" struct i915_sleeve { struct i915_vma *vma; @@ -72,7 +75,6 @@ static struct i915_sleeve *create_sleeve(struct i915_address_space *vm, vma->ops = &proxy_vma_ops; sleeve->vma = vma; - sleeve->obj = i915_gem_object_get(obj); sleeve->pages = pages; sleeve->page_sizes = *page_sizes; @@ -85,7 +87,6 @@ err_free: static void destroy_sleeve(struct i915_sleeve *sleeve) { - i915_gem_object_put(sleeve->obj); kfree(sleeve); } @@ -154,32 +155,37 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence, static void clear_pages_worker(struct work_struct *work) { struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_private *i915 = w->ce->gem_context->i915; - struct drm_i915_gem_object *obj = w->sleeve->obj; + struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; struct i915_request *rq; + struct i915_vma *batch; int err = w->dma.error; if (unlikely(err)) goto out_signal; if (obj->cache_dirty) { - obj->write_domain = 0; if (i915_gem_object_has_struct_page(obj)) drm_clflush_sg(w->sleeve->pages); obj->cache_dirty = false; } + obj->read_domains = I915_GEM_GPU_DOMAINS; + obj->write_domain = 0; - /* XXX: we need to kill this */ - mutex_lock(&i915->drm.struct_mutex); err = i915_vma_pin(vma, 0, 0, PIN_USER); if (unlikely(err)) - goto out_unlock; + goto out_signal; + + batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unpin; + } - rq = i915_request_create(w->ce); + rq = intel_context_create_request(w->ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unpin; + goto out_batch; } /* There's no way the fence has signalled */ @@ -187,20 +193,28 @@ static void clear_pages_worker(struct work_struct *work) clear_pages_dma_fence_cb)) GEM_BUG_ON(1); + err = intel_emit_vma_mark_active(batch, rq); + if (unlikely(err)) + goto out_request; + if (w->ce->engine->emit_init_breadcrumb) { err = w->ce->engine->emit_init_breadcrumb(rq); if (unlikely(err)) goto out_request; } - /* XXX: more feverish nightmares await */ - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + /* + * w->dma is already exported via (vma|obj)->resv we need only + * keep track of the GPU activity within this vma/request, and + * propagate the signal from the request to w->dma. + */ + err = __i915_vma_move_to_active(vma, rq); if (err) goto out_request; - err = intel_emit_vma_fill_blt(rq, vma, w->value); + err = w->ce->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); out_request: if (unlikely(err)) { i915_request_skip(rq, err); @@ -208,10 +222,10 @@ out_request: } i915_request_add(rq); +out_batch: + intel_emit_vma_release(w->ce, batch); out_unpin: i915_vma_unpin(vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); out_signal: if (unlikely(err)) { dma_fence_set_error(&w->dma, err); @@ -248,14 +262,11 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, struct i915_page_sizes *page_sizes, u32 value) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_gem_context *ctx = ce->gem_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct clear_pages_work *work; struct i915_sleeve *sleeve; int err; - sleeve = create_sleeve(vm, obj, pages, page_sizes); + sleeve = create_sleeve(ce->vm, obj, pages, page_sizes); if (IS_ERR(sleeve)) return PTR_ERR(sleeve); @@ -273,11 +284,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, init_irq_work(&work->irq_work, clear_pages_signal_irq_worker); - dma_fence_init(&work->dma, - &clear_pages_work_ops, - &fence_lock, - i915->mm.unordered_timeline, - 0); + dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); i915_sw_fence_init(&work->wait, clear_pages_work_notify); i915_gem_object_lock(obj); @@ -288,7 +295,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, if (err < 0) { dma_fence_set_error(&work->dma, err); } else { - reservation_object_add_excl_fence(obj->base.resv, &work->dma); + dma_resv_add_excl_fence(obj->base.resv, &work->dma); err = 0; } i915_gem_object_unlock(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 0f2c22a3bcb6..a2e57e62af30 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -69,7 +69,13 @@ #include <drm/i915_drm.h> +#include "gt/gen6_ppgtt.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_heartbeat.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_user.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_ring.h" #include "i915_gem_context.h" #include "i915_globals.h" @@ -158,7 +164,7 @@ lookup_user_engine(struct i915_gem_context *ctx, if (!engine) return ERR_PTR(-EINVAL); - idx = engine->id; + idx = engine->legacy_idx; } else { idx = ci->engine_instance; } @@ -166,93 +172,71 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) +static struct i915_address_space * +context_get_vm_rcu(struct i915_gem_context *ctx) { - unsigned int max; + GEM_BUG_ON(!rcu_access_pointer(ctx->vm)); - lockdep_assert_held(&i915->contexts.mutex); + do { + struct i915_address_space *vm; - if (INTEL_GEN(i915) >= 11) - max = GEN11_MAX_CONTEXT_HW_ID; - else if (USES_GUC_SUBMISSION(i915)) /* - * When using GuC in proxy submission, GuC consumes the - * highest bit in the context id to indicate proxy submission. + * We do not allow downgrading from full-ppgtt [to a shared + * global gtt], so ctx->vm cannot become NULL. */ - max = MAX_GUC_CONTEXT_HW_ID; - else - max = MAX_CONTEXT_HW_ID; - - return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); -} - -static int steal_hw_id(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx, *cn; - LIST_HEAD(pinned); - int id = -ENOSPC; - - lockdep_assert_held(&i915->contexts.mutex); - - list_for_each_entry_safe(ctx, cn, - &i915->contexts.hw_id_list, hw_id_link) { - if (atomic_read(&ctx->hw_id_pin_count)) { - list_move_tail(&ctx->hw_id_link, &pinned); + vm = rcu_dereference(ctx->vm); + if (!kref_get_unless_zero(&vm->ref)) continue; - } - GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ - list_del_init(&ctx->hw_id_link); - id = ctx->hw_id; - break; - } + /* + * This ppgtt may have be reallocated between + * the read and the kref, and reassigned to a third + * context. In order to avoid inadvertent sharing + * of this ppgtt with that third context (and not + * src), we have to confirm that we have the same + * ppgtt after passing through the strong memory + * barrier implied by a successful + * kref_get_unless_zero(). + * + * Once we have acquired the current ppgtt of ctx, + * we no longer care if it is released from ctx, as + * it cannot be reallocated elsewhere. + */ - /* - * Remember how far we got up on the last repossesion scan, so the - * list is kept in a "least recently scanned" order. - */ - list_splice_tail(&pinned, &i915->contexts.hw_id_list); - return id; + if (vm == rcu_access_pointer(ctx->vm)) + return rcu_pointer_handoff(vm); + + i915_vm_put(vm); + } while (1); } -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) +static void intel_context_set_gem(struct intel_context *ce, + struct i915_gem_context *ctx) { - int ret; + GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); + RCU_INIT_POINTER(ce->gem_context, ctx); - lockdep_assert_held(&i915->contexts.mutex); + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) + ce->ring = __intel_context_ring_size(SZ_16K); - /* - * We prefer to steal/stall ourselves and our users over that of the - * entire system. That may be a little unfair to our users, and - * even hurt high priority clients. The choice is whether to oomkill - * something else, or steal a context id. - */ - ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(ret < 0)) { - ret = steal_hw_id(i915); - if (ret < 0) /* once again for the correct errno code */ - ret = new_hw_id(i915, GFP_KERNEL); - if (ret < 0) - return ret; - } + if (rcu_access_pointer(ctx->vm)) { + struct i915_address_space *vm; - *out = ret; - return 0; -} + rcu_read_lock(); + vm = context_get_vm_rcu(ctx); /* hmm */ + rcu_read_unlock(); -static void release_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; + i915_vm_put(ce->vm); + ce->vm = vm; + } - if (list_empty(&ctx->hw_id_link)) - return; + GEM_BUG_ON(ce->timeline); + if (ctx->timeline) + ce->timeline = intel_timeline_get(ctx->timeline); - mutex_lock(&i915->contexts.mutex); - if (!list_empty(&ctx->hw_id_link)) { - ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); - list_del_init(&ctx->hw_id_link); - } - mutex_unlock(&i915->contexts.mutex); + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) + __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -261,6 +245,7 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count) if (!e->engines[count]) continue; + RCU_INIT_POINTER(e->engines[count]->gem_context, NULL); intel_context_put(e->engines[count]); } kfree(e); @@ -278,6 +263,7 @@ static void free_engines_rcu(struct rcu_head *rcu) static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) { + const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; struct i915_gem_engines *e; enum intel_engine_id id; @@ -287,104 +273,254 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) return ERR_PTR(-ENOMEM); init_rcu_head(&e->rcu); - for_each_engine(engine, ctx->i915, id) { + for_each_engine(engine, gt, id) { struct intel_context *ce; - ce = intel_context_create(ctx, engine); + if (engine->legacy_idx == INVALID_ENGINE) + continue; + + GEM_BUG_ON(engine->legacy_idx >= I915_NUM_ENGINES); + GEM_BUG_ON(e->engines[engine->legacy_idx]); + + ce = intel_context_create(engine); if (IS_ERR(ce)) { - __free_engines(e, id); + __free_engines(e, e->num_engines + 1); return ERR_CAST(ce); } - e->engines[id] = ce; + intel_context_set_gem(ce, ctx); + + e->engines[engine->legacy_idx] = ce; + e->num_engines = max(e->num_engines, engine->legacy_idx); } - e->num_engines = id; + e->num_engines++; return e; } static void i915_gem_context_free(struct i915_gem_context *ctx) { - lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - release_hw_id(ctx); - if (ctx->vm) - i915_vm_put(ctx->vm); + spin_lock(&ctx->i915->gem.contexts.lock); + list_del(&ctx->link); + spin_unlock(&ctx->i915->gem.contexts.lock); free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); if (ctx->timeline) - i915_timeline_put(ctx->timeline); + intel_timeline_put(ctx->timeline); - kfree(ctx->name); put_pid(ctx->pid); - - list_del(&ctx->link); mutex_destroy(&ctx->mutex); kfree_rcu(ctx, rcu); } -static void contexts_free(struct drm_i915_private *i915) +static void contexts_free_all(struct llist_node *list) { - struct llist_node *freed = llist_del_all(&i915->contexts.free_list); struct i915_gem_context *ctx, *cn; - lockdep_assert_held(&i915->drm.struct_mutex); - - llist_for_each_entry_safe(ctx, cn, freed, free_link) + llist_for_each_entry_safe(ctx, cn, list, free_link) i915_gem_context_free(ctx); } -static void contexts_free_first(struct drm_i915_private *i915) +static void contexts_flush_free(struct i915_gem_contexts *gc) { - struct i915_gem_context *ctx; - struct llist_node *freed; - - lockdep_assert_held(&i915->drm.struct_mutex); - - freed = llist_del_first(&i915->contexts.free_list); - if (!freed) - return; - - ctx = container_of(freed, typeof(*ctx), free_link); - i915_gem_context_free(ctx); + contexts_free_all(llist_del_all(&gc->free_list)); } static void contexts_free_worker(struct work_struct *work) { - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), contexts.free_work); + struct i915_gem_contexts *gc = + container_of(work, typeof(*gc), free_work); - mutex_lock(&i915->drm.struct_mutex); - contexts_free(i915); - mutex_unlock(&i915->drm.struct_mutex); + contexts_flush_free(gc); } void i915_gem_context_release(struct kref *ref) { struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - struct drm_i915_private *i915 = ctx->i915; + struct i915_gem_contexts *gc = &ctx->i915->gem.contexts; trace_i915_context_free(ctx); - if (llist_add(&ctx->free_link, &i915->contexts.free_list)) - queue_work(i915->wq, &i915->contexts.free_work); + if (llist_add(&ctx->free_link, &gc->free_list)) + schedule_work(&gc->free_work); } -static void context_close(struct i915_gem_context *ctx) +static inline struct i915_gem_engines * +__context_engines_static(const struct i915_gem_context *ctx) { - mutex_lock(&ctx->mutex); + return rcu_dereference_protected(ctx->engines, true); +} - i915_gem_context_set_closed(ctx); - ctx->file_priv = ERR_PTR(-EBADF); +static bool __reset_engine(struct intel_engine_cs *engine) +{ + struct intel_gt *gt = engine->gt; + bool success = false; + + if (!intel_has_reset_engine(gt)) + return false; + + if (!test_and_set_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags)) { + success = intel_engine_reset(engine, NULL) == 0; + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags); + } + + return success; +} + +static void __reset_context(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_gt_handle_error(engine->gt, engine->mask, 0, + "context closure in %s", ctx->name); +} + +static bool __cancel_engine(struct intel_engine_cs *engine) +{ + /* + * Send a "high priority pulse" down the engine to cause the + * current request to be momentarily preempted. (If it fails to + * be preempted, it will be reset). As we have marked our context + * as banned, any incomplete request, including any running, will + * be skipped following the preemption. + * + * If there is no hangchecking (one of the reasons why we try to + * cancel the context) and no forced preemption, there may be no + * means by which we reset the GPU and evict the persistent hog. + * Ergo if we are unable to inject a preemptive pulse that can + * kill the banned context, we fallback to doing a local reset + * instead. + */ + if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) && + !intel_engine_pulse(engine)) + return true; + + /* If we are unable to send a pulse, try resetting this engine. */ + return __reset_engine(engine); +} + +static struct intel_engine_cs *__active_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; /* - * This context will never again be assinged to HW, so we can - * reuse its ID for the next context. + * Serialise with __i915_request_submit() so that it sees + * is-banned?, or we know the request is already inflight. */ - release_hw_id(ctx); + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->active.lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->active.lock); + spin_lock(&engine->active.lock); + locked = engine; + } + + engine = NULL; + if (i915_request_is_active(rq) && !rq->fence.error) + engine = rq->engine; + + spin_unlock_irq(&locked->active.lock); + + return engine; +} + +static struct intel_engine_cs *active_engine(struct intel_context *ce) +{ + struct intel_engine_cs *engine = NULL; + struct i915_request *rq; + + if (!ce->timeline) + return NULL; + + mutex_lock(&ce->timeline->mutex); + list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { + if (i915_request_completed(rq)) + break; + + /* Check with the backend if the request is inflight */ + engine = __active_engine(rq); + if (engine) + break; + } + mutex_unlock(&ce->timeline->mutex); + + return engine; +} + +static void kill_context(struct i915_gem_context *ctx) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + + /* + * Map the user's engine back to the actual engines; one virtual + * engine will be mapped to multiple engines, and using ctx->engine[] + * the same engine may be have multiple instances in the user's map. + * However, we only care about pending requests, so only include + * engines on which there are incomplete requests. + */ + for_each_gem_engine(ce, __context_engines_static(ctx), it) { + struct intel_engine_cs *engine; + + if (intel_context_set_banned(ce)) + continue; + + /* + * Check the current active state of this context; if we + * are currently executing on the GPU we need to evict + * ourselves. On the other hand, if we haven't yet been + * submitted to the GPU or if everything is complete, + * we have nothing to do. + */ + engine = active_engine(ce); + + /* First attempt to gracefully cancel the context */ + if (engine && !__cancel_engine(engine)) + /* + * If we are unable to send a preemptive pulse to bump + * the context from the GPU, we have to resort to a full + * reset. We hope the collateral damage is worth it. + */ + __reset_context(ctx, engine); + } +} + +static void set_closed_name(struct i915_gem_context *ctx) +{ + char *s; + + /* Replace '[]' with '<>' to indicate closed in debug prints */ + + s = strrchr(ctx->name, '['); + if (!s) + return; + + *s = '<'; + + s = strchr(s + 1, ']'); + if (s) + *s = '>'; +} + +static void context_close(struct i915_gem_context *ctx) +{ + struct i915_address_space *vm; + + i915_gem_context_set_closed(ctx); + set_closed_name(ctx); + + mutex_lock(&ctx->mutex); + + vm = i915_gem_context_vm(ctx); + if (vm) + i915_vm_close(vm); + + ctx->file_priv = ERR_PTR(-EBADF); /* * The LUT uses the VMA as a backpointer to unref the object, @@ -394,31 +530,45 @@ static void context_close(struct i915_gem_context *ctx) lut_close(ctx); mutex_unlock(&ctx->mutex); + + /* + * If the user has disabled hangchecking, we can not be sure that + * the batches will ever complete after the context is closed, + * keeping the context and all resources pinned forever. So in this + * case we opt to forcibly kill off all remaining requests on + * context close. + */ + if (!i915_gem_context_is_persistent(ctx) || + !i915_modparams.enable_hangcheck) + kill_context(ctx); + i915_gem_context_put(ctx); } -static u32 default_desc_template(const struct drm_i915_private *i915, - const struct i915_address_space *vm) +static int __context_set_persistence(struct i915_gem_context *ctx, bool state) { - u32 address_mode; - u32 desc; - - desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + if (i915_gem_context_is_persistent(ctx) == state) + return 0; - address_mode = INTEL_LEGACY_32B_CONTEXT; - if (vm && i915_vm_is_4lvl(vm)) - address_mode = INTEL_LEGACY_64B_CONTEXT; - desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; + if (state) { + /* + * Only contexts that are short-lived [that will expire or be + * reset] are allowed to survive past termination. We require + * hangcheck to ensure that the persistent requests are healthy. + */ + if (!i915_modparams.enable_hangcheck) + return -EINVAL; - if (IS_GEN(i915, 8)) - desc |= GEN8_CTX_L3LLC_COHERENT; + i915_gem_context_set_persistence(ctx); + } else { + /* To cancel a context we use "preempt-to-idle" */ + if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + return -ENODEV; - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers - * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; - */ + i915_gem_context_clear_persistence(ctx); + } - return desc; + return 0; } static struct i915_gem_context * @@ -434,7 +584,6 @@ __create_context(struct drm_i915_private *i915) return ERR_PTR(-ENOMEM); kref_init(&ctx->ref); - list_add_tail(&ctx->link, &i915->contexts.list); ctx->i915 = i915; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); @@ -448,7 +597,6 @@ __create_context(struct drm_i915_private *i915) RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there @@ -457,14 +605,15 @@ __create_context(struct drm_i915_private *i915) i915_gem_context_set_bannable(ctx); i915_gem_context_set_recoverable(ctx); - - ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = - default_desc_template(i915, &i915->mm.aliasing_ppgtt->vm); + __context_set_persistence(ctx, true /* cgroup hook? */); for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + spin_lock(&i915->gem.contexts.lock); + list_add_tail(&ctx->link, &i915->gem.contexts.list); + spin_unlock(&i915->gem.contexts.lock); + return ctx; err_free: @@ -472,13 +621,34 @@ err_free: return ERR_PTR(err); } +static void +context_apply_all(struct i915_gem_context *ctx, + void (*fn)(struct intel_context *ce, void *data), + void *data) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) + fn(ce, data); + i915_gem_context_unlock_engines(ctx); +} + +static void __apply_ppgtt(struct intel_context *ce, void *vm) +{ + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(vm); +} + static struct i915_address_space * __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - struct i915_address_space *old = ctx->vm; + struct i915_address_space *old = i915_gem_context_vm(ctx); + + GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - ctx->vm = i915_vm_get(vm); - ctx->desc_template = default_desc_template(ctx->i915, vm); + rcu_assign_pointer(ctx->vm, i915_vm_open(vm)); + context_apply_all(ctx, __apply_ppgtt, vm); return old; } @@ -486,36 +656,57 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) static void __assign_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - if (vm == ctx->vm) + if (vm == rcu_access_pointer(ctx->vm)) return; vm = __set_ppgtt(ctx, vm); if (vm) - i915_vm_put(vm); + i915_vm_close(vm); +} + +static void __set_timeline(struct intel_timeline **dst, + struct intel_timeline *src) +{ + struct intel_timeline *old = *dst; + + *dst = src ? intel_timeline_get(src) : NULL; + + if (old) + intel_timeline_put(old); +} + +static void __apply_timeline(struct intel_context *ce, void *timeline) +{ + __set_timeline(&ce->timeline, timeline); +} + +static void __assign_timeline(struct i915_gem_context *ctx, + struct intel_timeline *timeline) +{ + __set_timeline(&ctx->timeline, timeline); + context_apply_all(ctx, __apply_timeline, timeline); } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_context *ctx; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(dev_priv)) + !HAS_EXECLISTS(i915)) return ERR_PTR(-EINVAL); - /* Reap the most stale context */ - contexts_free_first(dev_priv); + /* Reap the stale contexts */ + contexts_flush_free(&i915->gem.contexts); - ctx = __create_context(dev_priv); + ctx = __create_context(i915); if (IS_ERR(ctx)) return ctx; - if (HAS_FULL_PPGTT(dev_priv)) { + if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -523,20 +714,24 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) return ERR_CAST(ppgtt); } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct i915_timeline *timeline; + struct intel_timeline *timeline; - timeline = i915_timeline_create(dev_priv, NULL); + timeline = intel_timeline_create(&i915->gt, NULL); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); } - ctx->timeline = timeline; + __assign_timeline(ctx, timeline); + intel_timeline_put(timeline); } trace_i915_context_create(ctx); @@ -544,171 +739,26 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) return ctx; } -/** - * i915_gem_context_create_gvt - create a GVT GEM context - * @dev: drm device * - * - * This function is used to create a GVT specific GEM context. - * - * Returns: - * pointer to i915_gem_context on success, error pointer if failed - * - */ -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev) -{ - struct i915_gem_context *ctx; - int ret; - - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return ERR_PTR(-ENODEV); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - - ctx = i915_gem_create_context(to_i915(dev), 0); - if (IS_ERR(ctx)) - goto out; - - ret = i915_gem_context_pin_hw_id(ctx); - if (ret) { - context_close(ctx); - ctx = ERR_PTR(ret); - goto out; - } - - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_set_closed(ctx); /* not user accessible */ - i915_gem_context_clear_bannable(ctx); - i915_gem_context_set_force_single_submission(ctx); - if (!USES_GUC_SUBMISSION(to_i915(dev))) - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ - - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); -out: - mutex_unlock(&dev->struct_mutex); - return ctx; -} - -static void -destroy_kernel_context(struct i915_gem_context **ctxp) -{ - struct i915_gem_context *ctx; - - /* Keep the context ref so that we can free it immediately ourselves */ - ctx = i915_gem_context_get(fetch_and_zero(ctxp)); - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - context_close(ctx); - i915_gem_context_free(ctx); -} - -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) -{ - struct i915_gem_context *ctx; - int err; - - ctx = i915_gem_create_context(i915, 0); - if (IS_ERR(ctx)) - return ctx; - - err = i915_gem_context_pin_hw_id(ctx); - if (err) { - destroy_kernel_context(&ctx); - return ERR_PTR(err); - } - - i915_gem_context_clear_bannable(ctx); - ctx->sched.priority = I915_USER_PRIORITY(prio); - ctx->ring_size = PAGE_SIZE; - - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - return ctx; -} - -static void init_contexts(struct drm_i915_private *i915) +static void init_contexts(struct i915_gem_contexts *gc) { - mutex_init(&i915->contexts.mutex); - INIT_LIST_HEAD(&i915->contexts.list); + spin_lock_init(&gc->lock); + INIT_LIST_HEAD(&gc->list); - /* Using the simple ida interface, the max is limited by sizeof(int) */ - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); - ida_init(&i915->contexts.hw_ida); - INIT_LIST_HEAD(&i915->contexts.hw_id_list); - - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); - init_llist_head(&i915->contexts.free_list); + INIT_WORK(&gc->free_work, contexts_free_worker); + init_llist_head(&gc->free_list); } -static bool needs_preempt_context(struct drm_i915_private *i915) +void i915_gem_init__contexts(struct drm_i915_private *i915) { - return HAS_EXECLISTS(i915); -} - -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) -{ - struct i915_gem_context *ctx; - - /* Reassure ourselves we are only called once */ - GEM_BUG_ON(dev_priv->kernel_context); - GEM_BUG_ON(dev_priv->preempt_context); - - intel_engine_init_ctx_wa(dev_priv->engine[RCS0]); - init_contexts(dev_priv); - - /* lowest priority; idle task */ - ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); - if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default global context\n"); - return PTR_ERR(ctx); - } - /* - * For easy recognisablity, we want the kernel context to be 0 and then - * all user contexts will have non-zero hw_id. Kernel contexts are - * permanently pinned, so that we never suffer a stall and can - * use them from any allocation context (e.g. for evicting other - * contexts and from inside the shrinker). - */ - GEM_BUG_ON(ctx->hw_id); - GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); - dev_priv->kernel_context = ctx; - - /* highest priority; preempting task */ - if (needs_preempt_context(dev_priv)) { - ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); - if (!IS_ERR(ctx)) - dev_priv->preempt_context = ctx; - else - DRM_ERROR("Failed to create preempt context; disabling preemption\n"); - } - + init_contexts(&i915->gem.contexts); DRM_DEBUG_DRIVER("%s context support initialized\n", - DRIVER_CAPS(dev_priv)->has_logical_contexts ? + DRIVER_CAPS(i915)->has_logical_contexts ? "logical" : "fake"); - return 0; } -void i915_gem_contexts_fini(struct drm_i915_private *i915) +void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->drm.struct_mutex); - - if (i915->preempt_context) - destroy_kernel_context(&i915->preempt_context); - destroy_kernel_context(&i915->kernel_context); - - /* Must free all deferred contexts (via flush_workqueue) first */ - GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); - ida_destroy(&i915->contexts.hw_ida); -} - -static int context_idr_cleanup(int id, void *p, void *data) -{ - context_close(p); - return 0; + flush_work(&i915->gem.contexts.free_work); } static int vm_idr_cleanup(int id, void *p, void *data) @@ -718,33 +768,29 @@ static int vm_idr_cleanup(int id, void *p, void *data) } static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv) + struct drm_i915_file_private *fpriv, + u32 *id) { + struct i915_address_space *vm; int ret; ctx->file_priv = fpriv; - if (ctx->vm) - ctx->vm->file = fpriv; + + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->file, fpriv); /* XXX */ + mutex_unlock(&ctx->mutex); ctx->pid = get_task_pid(current, PIDTYPE_PID); - ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", - current->comm, pid_nr(ctx->pid)); - if (!ctx->name) { - ret = -ENOMEM; - goto err_pid; - } + snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", + current->comm, pid_nr(ctx->pid)); /* And finally expose ourselves to userspace via the idr */ - mutex_lock(&fpriv->context_idr_lock); - ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&fpriv->context_idr_lock); - if (ret >= 0) - goto out; + ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); + if (ret) + put_pid(fetch_and_zero(&ctx->pid)); - kfree(fetch_and_zero(&ctx->name)); -err_pid: - put_pid(fetch_and_zero(&ctx->pid)); -out: return ret; } @@ -754,51 +800,51 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_gem_context *ctx; int err; + u32 id; - mutex_init(&file_priv->context_idr_lock); - mutex_init(&file_priv->vm_idr_lock); + xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC); - idr_init(&file_priv->context_idr); + mutex_init(&file_priv->vm_idr_lock); idr_init_base(&file_priv->vm_idr, 1); - mutex_lock(&i915->drm.struct_mutex); ctx = i915_gem_create_context(i915, 0); - mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; } - err = gem_context_register(ctx, file_priv); + err = gem_context_register(ctx, file_priv, &id); if (err < 0) goto err_ctx; - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); - GEM_BUG_ON(err > 0); - + GEM_BUG_ON(id); return 0; err_ctx: context_close(ctx); err: idr_destroy(&file_priv->vm_idr); - idr_destroy(&file_priv->context_idr); + xa_destroy(&file_priv->context_xa); mutex_destroy(&file_priv->vm_idr_lock); - mutex_destroy(&file_priv->context_idr_lock); return err; } void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *i915 = file_priv->dev_priv; + struct i915_gem_context *ctx; + unsigned long idx; - idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); - idr_destroy(&file_priv->context_idr); - mutex_destroy(&file_priv->context_idr_lock); + xa_for_each(&file_priv->context_xa, idx, ctx) + context_close(ctx); + xa_destroy(&file_priv->context_xa); idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); idr_destroy(&file_priv->vm_idr); mutex_destroy(&file_priv->vm_idr_lock); + + contexts_flush_free(&i915->gem.contexts); } int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, @@ -816,7 +862,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags) return -EINVAL; - ppgtt = i915_ppgtt_create(i915); + ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -891,6 +937,7 @@ struct context_barrier_task { void *data; }; +__i915_active_call static void cb_retire(struct i915_active *base) { struct context_barrier_task *cb = container_of(base, typeof(*cb), base); @@ -910,21 +957,23 @@ static int context_barrier_task(struct i915_gem_context *ctx, void (*task)(void *data), void *data) { - struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; struct i915_gem_engines_iter it; struct intel_context *ce; int err = 0; - lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!task); cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; - i915_active_init(i915, &cb->base, cb_retire); - i915_active_acquire(&cb->base); + i915_active_init(&cb->base, NULL, cb_retire); + err = i915_active_acquire(&cb->base); + if (err) { + kfree(cb); + return err; + } for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct i915_request *rq; @@ -951,7 +1000,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (emit) err = emit(rq, data); if (err == 0) - err = i915_active_ref(&cb->base, rq->fence.context, rq); + err = i915_active_add_request(&cb->base, rq); i915_request_add(rq); if (err) @@ -974,16 +1023,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_address_space *vm; int ret; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; - /* XXX rcu acquire? */ - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (ret) - return ret; - - vm = i915_vm_get(ctx->vm); - mutex_unlock(&ctx->i915->drm.struct_mutex); + rcu_read_lock(); + vm = context_get_vm_rcu(ctx); + rcu_read_unlock(); ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); if (ret) @@ -994,7 +1039,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (ret < 0) goto err_unlock; - i915_vm_get(vm); + i915_vm_open(vm); args->size = 0; args->value = ret; @@ -1014,12 +1059,12 @@ static void set_ppgtt_barrier(void *data) if (INTEL_GEN(old->i915) < 8) gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - i915_vm_put(old); + i915_vm_close(old); } static int emit_ppgtt_update(struct i915_request *rq, void *data) { - struct i915_address_space *vm = rq->gem_context->vm; + struct i915_address_space *vm = rq->context->vm; struct intel_engine_cs *engine = rq->engine; u32 base = engine->mmio_base; u32 *cs; @@ -1044,12 +1089,18 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) intel_ring_advance(rq, cs); } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + int err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; for (i = GEN8_3LVL_PDPES; i--; ) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); @@ -1060,9 +1111,6 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) } *cs++ = MI_NOOP; intel_ring_advance(rq, cs); - } else { - /* ppGTT is not part of the legacy context image */ - gen6_ppgtt_pin(i915_vm_to_ppgtt(vm)); } return 0; @@ -1070,10 +1118,20 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) static bool skip_ppgtt_update(struct intel_context *ce, void *data) { + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) + return true; + if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return !ce->state; - else - return !atomic_read(&ce->pin_count); + return false; + + if (!atomic_read(&ce->pin_count)) + return true; + + /* ppGTT is not part of the legacy context image */ + if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm))) + return true; + + return false; } static int set_ppgtt(struct drm_i915_file_private *file_priv, @@ -1086,34 +1144,34 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (args->size) return -EINVAL; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; if (upper_32_bits(args->value)) return -ENOENT; - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - + rcu_read_lock(); vm = idr_find(&file_priv->vm_idr, args->value); - if (vm) - i915_vm_get(vm); - mutex_unlock(&file_priv->vm_idr_lock); + if (vm && !kref_get_unless_zero(&vm->ref)) + vm = NULL; + rcu_read_unlock(); if (!vm) return -ENOENT; - err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + err = mutex_lock_interruptible(&ctx->mutex); if (err) goto out; - if (vm == ctx->vm) + if (i915_gem_context_is_closed(ctx)) { + err = -ENOENT; + goto unlock; + } + + if (vm == rcu_access_pointer(ctx->vm)) goto unlock; /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - mutex_lock(&ctx->mutex); lut_close(ctx); - mutex_unlock(&ctx->mutex); old = __set_ppgtt(ctx, vm); @@ -1128,14 +1186,12 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, set_ppgtt_barrier, old); if (err) { - ctx->vm = old; - ctx->desc_template = default_desc_template(ctx->i915, old); - i915_vm_put(vm); + i915_vm_close(__set_ppgtt(ctx, old)); + i915_vm_close(old); } unlock: - mutex_unlock(&ctx->i915->drm.struct_mutex); - + mutex_unlock(&ctx->mutex); out: i915_vm_put(vm); return err; @@ -1154,7 +1210,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE + - (CTX_R_PWR_CLK_STATE + 1) * 4; + CTX_R_PWR_CLK_STATE * 4; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); @@ -1180,44 +1236,32 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) * image, or into the registers directory, does not stick). Pristine * and idle contexts will be configured on pinning. */ - if (!intel_context_is_pinned(ce)) + if (!intel_context_pin_if_active(ce)) return 0; - rq = i915_request_create(ce->engine->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - /* Queue this switch after all other activity by this context. */ - ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); - if (ret) - goto out_add; - - /* - * Guarantee context image and the timeline remains pinned until the - * modifying request is retired by setting the ce activity tracker. - * - * But we only need to take one pin on the account of it. Or in other - * words transfer the pinned ce object to tracked active request. - */ - GEM_BUG_ON(i915_active_is_idle(&ce->active)); - ret = i915_active_ref(&ce->active, rq->fence.context, rq); - if (ret) - goto out_add; + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_unpin; + } - ret = gen8_emit_rpcs_config(rq, ce, sseu); + /* Serialise with the remote context */ + ret = intel_context_prepare_remote_request(ce, rq); + if (ret == 0) + ret = gen8_emit_rpcs_config(rq, ce, sseu); -out_add: i915_request_add(rq); +out_unpin: + intel_context_unpin(ce); return ret; } static int -__intel_context_reconfigure_sseu(struct intel_context *ce, - struct intel_sseu sseu) +intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) { int ret; - GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8); + GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8); ret = intel_context_lock_pinned(ce); if (ret) @@ -1237,23 +1281,6 @@ unlock: } static int -intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) -{ - struct drm_i915_private *i915 = ce->gem_context->i915; - int ret; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - - mutex_unlock(&i915->drm.struct_mutex); - - return ret; -} - -static int user_to_context_sseu(struct drm_i915_private *i915, const struct drm_i915_gem_context_param_sseu *user, struct intel_sseu *context) @@ -1484,12 +1511,14 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) } } - ce = intel_execlists_create_virtual(set->ctx, siblings, n); + ce = intel_execlists_create_virtual(siblings, n); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out_siblings; } + intel_context_set_gem(ce, set->ctx); + if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { intel_context_put(ce); err = -EEXIST; @@ -1636,6 +1665,7 @@ set_engines(struct i915_gem_context *ctx, for (n = 0; n < num_engines; n++) { struct i915_engine_class_instance ci; struct intel_engine_cs *engine; + struct intel_context *ce; if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { __free_engines(set.engines, n); @@ -1658,11 +1688,15 @@ set_engines(struct i915_gem_context *ctx, return -ENOENT; } - set.engines->engines[n] = intel_context_create(ctx, engine); - if (!set.engines->engines[n]) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { __free_engines(set.engines, n); - return -ENOMEM; + return PTR_ERR(ce); } + + intel_context_set_gem(ce, ctx); + + set.engines->engines[n] = ce; } set.engines->num_engines = num_engines; @@ -1683,7 +1717,7 @@ replace: i915_gem_context_set_user_engines(ctx); else i915_gem_context_clear_user_engines(ctx); - rcu_swap_protected(ctx->engines, set.engines, 1); + set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1); mutex_unlock(&ctx->engines_mutex); call_rcu(&set.engines->rcu, free_engines_rcu); @@ -1776,7 +1810,7 @@ get_engines(struct i915_gem_context *ctx, if (e->engines[n]) { ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->instance; + ci.engine_instance = e->engines[n]->engine->uabi_instance; } if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { @@ -1792,6 +1826,54 @@ err_free: return err; } +static int +set_persistence(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + if (args->size) + return -EINVAL; + + return __context_set_persistence(ctx, args->value); +} + +static void __apply_priority(struct intel_context *ce, void *arg) +{ + struct i915_gem_context *ctx = arg; + + if (!intel_engine_has_semaphores(ce->engine)) + return; + + if (ctx->sched.priority >= I915_PRIORITY_NORMAL) + intel_context_set_use_semaphores(ce); + else + intel_context_clear_use_semaphores(ce); +} + +static int set_priority(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + s64 priority = args->value; + + if (args->size) + return -EINVAL; + + if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + return -ENODEV; + + if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + return -EINVAL; + + if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + return -EPERM; + + ctx->sched.priority = I915_USER_PRIORITY(priority); + context_apply_all(ctx, __apply_priority, ctx); + + return 0; +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1838,23 +1920,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, break; case I915_CONTEXT_PARAM_PRIORITY: - { - s64 priority = args->value; - - if (args->size) - ret = -EINVAL; - else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - ret = -ENODEV; - else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - ret = -EINVAL; - else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - ret = -EPERM; - else - ctx->sched.priority = - I915_USER_PRIORITY(priority); - } + ret = set_priority(ctx, args); break; case I915_CONTEXT_PARAM_SSEU: @@ -1869,6 +1935,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_engines(ctx, args); break; + case I915_CONTEXT_PARAM_PERSISTENCE: + ret = set_persistence(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -1930,20 +2000,23 @@ static int clone_engines(struct i915_gem_context *dst, */ if (intel_engine_is_virtual(engine)) clone->engines[n] = - intel_execlists_clone_virtual(dst, engine); + intel_execlists_clone_virtual(engine); else - clone->engines[n] = intel_context_create(dst, engine); + clone->engines[n] = intel_context_create(engine); if (IS_ERR_OR_NULL(clone->engines[n])) { __free_engines(clone, n); goto err_unlock; } + + intel_context_set_gem(clone->engines[n], dst); } clone->num_engines = n; user_engines = i915_gem_context_user_engines(src); i915_gem_context_unlock_engines(src); - free_engines(dst->engines); + /* Serialised by constructor */ + free_engines(__context_engines_static(dst)); RCU_INIT_POINTER(dst->engines, clone); if (user_engines) i915_gem_context_set_user_engines(dst); @@ -1978,7 +2051,8 @@ static int clone_sseu(struct i915_gem_context *dst, unsigned long n; int err; - clone = dst->engines; /* no locking required; sole access */ + /* no locking required; sole access under constructor*/ + clone = __context_engines_static(dst); if (e->num_engines != clone->num_engines) { err = -EINVAL; goto unlock; @@ -2011,13 +2085,8 @@ unlock: static int clone_timeline(struct i915_gem_context *dst, struct i915_gem_context *src) { - if (src->timeline) { - GEM_BUG_ON(src->timeline == dst->timeline); - - if (dst->timeline) - i915_timeline_put(dst->timeline); - dst->timeline = i915_timeline_get(src->timeline); - } + if (src->timeline) + __assign_timeline(dst, src->timeline); return 0; } @@ -2026,44 +2095,24 @@ static int clone_vm(struct i915_gem_context *dst, struct i915_gem_context *src) { struct i915_address_space *vm; + int err = 0; - rcu_read_lock(); - do { - vm = READ_ONCE(src->vm); - if (!vm) - break; - - if (!kref_get_unless_zero(&vm->ref)) - continue; - - /* - * This ppgtt may have be reallocated between - * the read and the kref, and reassigned to a third - * context. In order to avoid inadvertent sharing - * of this ppgtt with that third context (and not - * src), we have to confirm that we have the same - * ppgtt after passing through the strong memory - * barrier implied by a successful - * kref_get_unless_zero(). - * - * Once we have acquired the current ppgtt of src, - * we no longer care if it is released from src, as - * it cannot be reallocated elsewhere. - */ - - if (vm == READ_ONCE(src->vm)) - break; + if (!rcu_access_pointer(src->vm)) + return 0; - i915_vm_put(vm); - } while (1); + rcu_read_lock(); + vm = context_get_vm_rcu(src); rcu_read_unlock(); - if (vm) { + if (!mutex_lock_interruptible(&dst->mutex)) { __assign_ppgtt(dst, vm); - i915_vm_put(vm); + mutex_unlock(&dst->mutex); + } else { + err = -EINTR; } - return 0; + i915_vm_put(vm); + return err; } static int create_clone(struct i915_user_extension __user *ext, void *data) @@ -2134,6 +2183,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_context_create_ext *args = data; struct create_ext ext_data; int ret; + u32 id; if (!DRIVER_CAPS(i915)->has_logical_contexts) return -ENODEV; @@ -2141,24 +2191,18 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) return -EINVAL; - ret = i915_terminally_wedged(i915); + ret = intel_gt_terminally_wedged(&i915->gt); if (ret) return ret; ext_data.fpriv = file->driver_priv; if (client_is_banned(ext_data.fpriv)) { DRM_DEBUG("client %s[%d] banned from creating ctx\n", - current->comm, - pid_nr(get_task_pid(current, PIDTYPE_PID))); + current->comm, task_pid_nr(current)); return -EIO; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - ext_data.ctx = i915_gem_create_context(i915, args->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(ext_data.ctx)) return PTR_ERR(ext_data.ctx); @@ -2171,11 +2215,11 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, goto err_ctx; } - ret = gem_context_register(ext_data.ctx, ext_data.fpriv); + ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id); if (ret < 0) goto err_ctx; - args->ctx_id = ret; + args->ctx_id = id; DRM_DEBUG("HW context %d created\n", args->ctx_id); return 0; @@ -2198,11 +2242,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (!args->ctx_id) return -ENOENT; - if (mutex_lock_interruptible(&file_priv->context_idr_lock)) - return -EINTR; - - ctx = idr_remove(&file_priv->context_idr, args->ctx_id); - mutex_unlock(&file_priv->context_idr_lock); + ctx = xa_erase(&file_priv->context_xa, args->ctx_id); if (!ctx) return -ENOENT; @@ -2285,12 +2325,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - if (ctx->vm) - args->value = ctx->vm->total; - else if (to_i915(dev)->mm.aliasing_ppgtt) - args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total; + rcu_read_lock(); + if (rcu_access_pointer(ctx->vm)) + args->value = rcu_dereference(ctx->vm)->total; else args->value = to_i915(dev)->ggtt.vm.total; + rcu_read_unlock(); break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: @@ -2325,6 +2365,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_engines(ctx, args); break; + case I915_CONTEXT_PARAM_PERSISTENCE: + args->size = 0; + args->value = i915_gem_context_is_persistent(ctx); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -2356,7 +2401,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; int ret; @@ -2378,7 +2423,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, */ if (capable(CAP_SYS_ADMIN)) - args->reset_count = i915_reset_count(&dev_priv->gpu_error); + args->reset_count = i915_reset_count(&i915->gpu_error); else args->reset_count = 0; @@ -2391,33 +2436,6 @@ out: return ret; } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int err = 0; - - mutex_lock(&i915->contexts.mutex); - - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - - if (list_empty(&ctx->hw_id_link)) { - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); - - err = assign_hw_id(i915, &ctx->hw_id); - if (err) - goto out_unlock; - - list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); - } - - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); - atomic_inc(&ctx->hw_id_pin_count); - -out_unlock: - mutex_unlock(&i915->contexts.mutex); - return err; -} - /* GEM context-engines iterator: for_each_gem_engine() */ struct intel_context * i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 9691dd062f72..3ae61a355d87 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -11,6 +11,7 @@ #include "gt/intel_context.h" +#include "i915_drv.h" #include "i915_gem.h" #include "i915_scheduler.h" #include "intel_device_info.h" @@ -74,24 +75,19 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); } -static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) +static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx) { - return test_bit(CONTEXT_BANNED, &ctx->flags); + return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); } -static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx) +static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx) { - set_bit(CONTEXT_BANNED, &ctx->flags); + set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); } -static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx) +static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx) { - return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - -static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx) -{ - __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); + clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); } static inline bool @@ -112,37 +108,15 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) - return 0; - - return __i915_gem_context_pin_hw_id(ctx); -} - -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); - atomic_dec(&ctx->hw_id_pin_count); -} - -static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) -{ - return !ctx->file_priv; -} - /* i915_gem_context.c */ -int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); -void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); +void i915_gem_init__contexts(struct drm_i915_private *i915); +void i915_gem_driver_release__contexts(struct drm_i915_private *i915); int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); void i915_gem_context_close(struct drm_file *file); void i915_gem_context_release(struct kref *ctx_ref); -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev); int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); @@ -160,9 +134,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); - static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { @@ -175,6 +146,27 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline struct i915_address_space * +i915_gem_context_vm(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); +} + +static inline struct i915_address_space * +i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +{ + struct i915_address_space *vm; + + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (!vm) + vm = &ctx->i915->ggtt.vm; + vm = i915_vm_get(vm); + rcu_read_unlock(); + + return vm; +} + static inline struct i915_gem_engines * i915_gem_context_engines(struct i915_gem_context *ctx) { @@ -198,12 +190,6 @@ i915_gem_context_unlock_engines(struct i915_gem_context *ctx) } static inline struct intel_context * -i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx) -{ - return i915_gem_context_engines(ctx)->engines[idx]; -} - -static inline struct intel_context * i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) { struct intel_context *ce = ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index cc513410eeef..017ca803ab47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -26,7 +26,7 @@ struct pid; struct drm_i915_private; struct drm_i915_file_private; struct i915_address_space; -struct i915_timeline; +struct intel_timeline; struct intel_ring; struct i915_gem_engines { @@ -77,7 +77,7 @@ struct i915_gem_context { struct i915_gem_engines __rcu *engines; struct mutex engines_mutex; /* guards writes to engines */ - struct i915_timeline *timeline; + struct intel_timeline *timeline; /** * @vm: unique address space (GTT) @@ -88,7 +88,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space *vm; + struct i915_address_space __rcu *vm; /** * @pid: process id of creator @@ -100,15 +100,6 @@ struct i915_gem_context { */ struct pid *pid; - /** - * @name: arbitrary name - * - * A name is constructed for the context from the creator's process - * name, pid and user handle in order to uniquely identify the - * context in messages. - */ - const char *name; - /** link: place with &drm_i915_private.context_list */ struct list_head link; struct llist_node free_link; @@ -137,43 +128,19 @@ struct i915_gem_context { #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 +#define UCONTEXT_PERSISTENCE 4 /** * @flags: small set of booleans */ unsigned long flags; -#define CONTEXT_BANNED 0 -#define CONTEXT_CLOSED 1 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 -#define CONTEXT_USER_ENGINES 3 - - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; +#define CONTEXT_CLOSED 0 +#define CONTEXT_USER_ENGINES 1 struct mutex mutex; struct i915_sched_attr sched; - /** ring_size: size for allocating the per-engine ring buffer */ - u32 ring_size; - /** desc_template: invariant fields for the HW context descriptor */ - u32 desc_template; - /** guilty_count: How many times this context has caused a GPU hang. */ atomic_t guilty_count; /** @@ -197,6 +164,15 @@ struct i915_gem_context { * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** + * @name: arbitrary name, used for user debug + * + * A name is constructed for the context from the creator's process + * name, pid and user handle in order to uniquely identify the + * context in messages. + */ + char name[TASK_COMM_LEN + 8]; }; #endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index cbf1701d3acc..372b57ca0efc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -6,7 +6,7 @@ #include <linux/dma-buf.h> #include <linux/highmem.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include "i915_drv.h" #include "i915_gem_object.h" @@ -93,40 +93,6 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) i915_gem_object_unpin_map(obj); } -static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct page *page; - - if (page_num >= obj->base.size >> PAGE_SHIFT) - return NULL; - - if (!i915_gem_object_has_struct_page(obj)) - return NULL; - - if (i915_gem_object_pin_pages(obj)) - return NULL; - - /* Synchronisation is left to the caller (via .begin_cpu_access()) */ - page = i915_gem_object_get_page(obj, page_num); - if (IS_ERR(page)) - goto err_unpin; - - return kmap(page); - -err_unpin: - i915_gem_object_unpin_pages(obj); - return NULL; -} - -static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - - kunmap(virt_to_page(addr)); - i915_gem_object_unpin_pages(obj); -} - static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); @@ -195,8 +161,6 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .map_dma_buf = i915_gem_map_dma_buf, .unmap_dma_buf = i915_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, - .map = i915_gem_dmabuf_kmap, - .unmap = i915_gem_dmabuf_kunmap, .mmap = i915_gem_dmabuf_mmap, .vmap = i915_gem_dmabuf_vmap, .vunmap = i915_gem_dmabuf_vunmap, @@ -204,8 +168,7 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .end_cpu_access = i915_gem_end_cpu_access, }; -struct dma_buf *i915_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gem_obj, int flags) +struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); DEFINE_DMA_BUF_EXPORT_INFO(exp_info); @@ -222,7 +185,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return ERR_PTR(ret); } - return drm_gem_dmabuf_export(dev, &exp_info); + return drm_gem_dmabuf_export(gem_obj->dev, &exp_info); } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) @@ -257,6 +220,7 @@ static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { + static struct lock_class_key lock_class; struct dma_buf_attachment *attach; struct drm_i915_gem_object *obj; int ret; @@ -288,7 +252,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, } drm_gem_private_object_init(dev, &obj->base, dma_buf->size); - i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); + i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class); obj->base.import_attach = attach; obj->base.resv = dma_buf->resv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 2e3ce2a69653..0cc40e77bbd2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -12,6 +12,8 @@ #include "i915_gem_ioctls.h" #include "i915_gem_object.h" #include "i915_vma.h" +#include "i915_gem_lmem.h" +#include "i915_gem_mman.h" static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) { @@ -27,7 +29,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (!READ_ONCE(obj->pin_global)) + if (!i915_gem_object_is_framebuffer(obj)) return; i915_gem_object_lock(obj); @@ -148,9 +150,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); obj->read_domains |= I915_GEM_DOMAIN_GTT; if (write) { + struct i915_vma *vma; + obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = I915_GEM_DOMAIN_GTT; obj->mm.dirty = true; + + spin_lock(&obj->vma.lock); + for_each_ggtt_vma(vma, obj) + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + i915_vma_set_ggtt_write(vma); + spin_unlock(&obj->vma.lock); } i915_gem_object_unpin_pages(obj); @@ -175,112 +185,34 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - struct i915_vma *vma; int ret; - assert_object_held(obj); - if (obj->cache_level == cache_level) return 0; - /* Inspect the list of currently bound VMA and unbind any that would - * be invalid given the new cache-level. This is principally to - * catch the issue of the CS prefetch crossing page boundaries and - * reading an invalid PTE on older architectures. - */ -restart: - list_for_each_entry(vma, &obj->vma.list, obj_link) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - if (i915_vma_is_pinned(vma)) { - DRM_DEBUG("can not change the cache level of pinned objects\n"); - return -EBUSY; - } - - if (!i915_vma_is_closed(vma) && - i915_gem_valid_gtt_space(vma, cache_level)) - continue; - - ret = i915_vma_unbind(vma); - if (ret) - return ret; + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (ret) + return ret; - /* As unbinding may affect other elements in the - * obj->vma_list (due to side-effects from retiring - * an active vma), play safe and restart the iterator. - */ - goto restart; - } + ret = i915_gem_object_lock_interruptible(obj); + if (ret) + return ret; - /* We can reuse the existing drm_mm nodes but need to change the - * cache-level on the PTE. We could simply unbind them all and - * rebind with the correct cache-level on next use. However since - * we already have a valid slot, dma mapping, pages etc, we may as - * rewrite the PTE in the belief that doing so tramples upon less - * state and so involves less work. - */ - if (atomic_read(&obj->bind_count)) { - /* Before we change the PTE, the GPU must not be accessing it. - * If we wait upon the object, we know that all the bound - * VMA are no longer active. - */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - - if (!HAS_LLC(to_i915(obj->base.dev)) && - cache_level != I915_CACHE_NONE) { - /* Access to snoopable pages through the GTT is - * incoherent and on some machines causes a hard - * lockup. Relinquish the CPU mmaping to force - * userspace to refault in the pages and we can - * then double check if the GTT mapping is still - * valid for that pointer access. - */ - i915_gem_object_release_mmap(obj); - - /* As we no longer need a fence for GTT access, - * we can relinquish it now (and so prevent having - * to steal a fence from someone else on the next - * fence request). Note GPU activity would have - * dropped the fence as all snoopable access is - * supposed to be linear. - */ - for_each_ggtt_vma(vma, obj) { - ret = i915_vma_put_fence(vma); - if (ret) - return ret; - } - } else { - /* We either have incoherent backing store and - * so no GTT access or the architecture is fully - * coherent. In such cases, existing GTT mmaps - * ignore the cache bit in the PTE and we can - * rewrite it without confusing the GPU or having - * to force userspace to fault back in its mmaps. - */ - } - - list_for_each_entry(vma, &obj->vma.list, obj_link) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); - if (ret) - return ret; - } + /* Always invalidate stale cachelines */ + if (obj->cache_level != cache_level) { + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->cache_dirty = true; } - list_for_each_entry(vma, &obj->vma.list, obj_link) - vma->node.color = cache_level; - i915_gem_object_set_cache_coherency(obj, cache_level); - obj->cache_dirty = true; /* Always invalidate stale cachelines */ + i915_gem_object_unlock(obj); - return 0; + /* The cache-level will be applied when each vma is rebound. */ + return i915_gem_object_unbind(obj, + I915_GEM_OBJECT_UNBIND_ACTIVE | + I915_GEM_OBJECT_UNBIND_BARRIER); } int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, @@ -361,25 +293,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, goto out; } - if (obj->cache_level == level) - goto out; - - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (ret) - goto out; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - goto out; - - ret = i915_gem_object_lock_interruptible(obj); - if (ret == 0) { - ret = i915_gem_object_set_cache_level(obj, level); - i915_gem_object_unlock(obj); - } - mutex_unlock(&i915->drm.struct_mutex); + ret = i915_gem_object_set_cache_level(obj, level); out: i915_gem_object_put(obj); @@ -398,17 +312,16 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, unsigned int flags) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_vma *vma; int ret; - assert_object_held(obj); - - /* Mark the global pin early so that we account for the - * display coherency whilst setting up the cache domains. - */ - obj->pin_global++; + /* Frame buffer must be in LMEM (no migration yet) */ + if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) + return ERR_PTR(-EINVAL); - /* The display engine is not coherent with the LLC cache on gen6. As + /* + * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is * done with uncached PTEs. This is lowest common denominator for all * chipsets. @@ -418,14 +331,13 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * with that bit in the PTE to main memory with just one PIPE_CONTROL. */ ret = i915_gem_object_set_cache_level(obj, - HAS_WT(to_i915(obj->base.dev)) ? + HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE); - if (ret) { - vma = ERR_PTR(ret); - goto err_unpin_global; - } + if (ret) + return ERR_PTR(ret); - /* As the user may map the buffer once pinned in the display plane + /* + * As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. However, * it may simply be too big to fit into mappable, in which case @@ -442,21 +354,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); if (IS_ERR(vma)) - goto err_unpin_global; + return vma; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - __i915_gem_object_flush_for_display(obj); - - /* It should now be out of any other write domains, and we can update - * the domain values for our changes. - */ - obj->read_domains |= I915_GEM_DOMAIN_GTT; - - return vma; + i915_gem_object_flush_if_display(obj); -err_unpin_global: - obj->pin_global--; return vma; } @@ -466,14 +369,19 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) struct i915_vma *vma; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + if (!atomic_read(&obj->bind_count)) + return; mutex_lock(&i915->ggtt.vm.mutex); + spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { if (!drm_mm_node_allocated(&vma->node)) continue; + GEM_BUG_ON(vma->vm != &i915->ggtt.vm); list_move_tail(&vma->vm_link, &vma->vm->bound_list); } + spin_unlock(&obj->vma.lock); mutex_unlock(&i915->ggtt.vm.mutex); if (i915_gem_object_is_shrinkable(obj)) { @@ -481,7 +389,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) spin_lock_irqsave(&i915->mm.obj_lock, flags); - if (obj->mm.madv == I915_MADV_WILLNEED) + if (obj->mm.madv == I915_MADV_WILLNEED && + !atomic_read(&obj->mm.shrink_pin)) list_move_tail(&obj->mm.link, &i915->mm.shrink_list); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); @@ -495,12 +404,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) assert_object_held(obj); - if (WARN_ON(obj->pin_global == 0)) - return; - - if (--obj->pin_global == 0) - vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - /* Bump the LRU to try and avoid premature eviction whilst flipping */ i915_gem_object_bump_inactive_ggtt(obj); @@ -551,13 +454,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) return 0; } -static inline enum fb_op_origin -fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - /** * Called when user space prepares to use an object with the CPU, either * through the mmap ioctl's mapping or a GTT mapping. @@ -661,9 +557,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, i915_gem_object_unlock(obj); - if (write_domain != 0) - intel_fb_obj_invalidate(obj, - fb_write_origin(obj, write_domain)); + if (write_domain) + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); out_unpin: i915_gem_object_unpin_pages(obj); @@ -783,7 +678,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, } out: - intel_fb_obj_invalidate(obj, ORIGIN_CPU); + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 41dab9ea33cd..60c984e10c4a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -5,7 +5,7 @@ */ #include <linux/intel-iommu.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include <linux/sync_file.h> #include <linux/uaccess.h> @@ -16,13 +16,17 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" +#include "gt/intel_engine_pool.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_ring.h" -#include "i915_gem_ioctls.h" +#include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" +#include "i915_gem_ioctls.h" +#include "i915_sw_fence_work.h" #include "i915_trace.h" -#include "intel_drv.h" enum { FORCE_CPU_RELOC = 1, @@ -222,10 +226,10 @@ struct i915_execbuffer { struct intel_engine_cs *engine; /** engine to queue the request to */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ - struct i915_address_space *vm; /** GTT and vma for the request */ struct i915_request *request; /** our request to build */ struct i915_vma *batch; /** identity of the batch obj/vma */ + struct i915_vma *trampoline; /** trampoline used for chaining */ /** actual size of execobj[] as we may extend it for the cmdparser */ unsigned int buffer_count; @@ -274,28 +278,11 @@ struct i915_execbuffer { #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) -/* - * Used to convert any address to canonical form. - * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, - * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the - * addresses to be in a canonical form: - * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct - * canonical form [63:48] == [47]." - */ -#define GEN8_HIGH_ADDRESS_BIT 47 -static inline u64 gen8_canonical_addr(u64 address) -{ - return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); -} - -static inline u64 gen8_noncanonical_addr(u64 address) -{ - return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); -} - static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; + return intel_engine_requires_cmd_parser(eb->engine) || + (intel_engine_using_cmd_parser(eb->engine) && + eb->args->batch_len); } static int eb_create(struct i915_execbuffer *eb) @@ -696,7 +683,9 @@ static int eb_reserve(struct i915_execbuffer *eb) case 1: /* Too fragmented, unbind everything and retry */ - err = i915_gem_evict_vm(eb->vm); + mutex_lock(&eb->context->vm->mutex); + err = i915_gem_evict_vm(eb->context->vm); + mutex_unlock(&eb->context->vm->mutex); if (err) return err; break; @@ -724,12 +713,8 @@ static int eb_select_context(struct i915_execbuffer *eb) return -ENOENT; eb->gem_context = ctx; - if (ctx->vm) { - eb->vm = ctx->vm; + if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - } else { - eb->vm = &eb->i915->ggtt.vm; - } eb->context_flags = 0; if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) @@ -738,63 +723,6 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } -static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) -{ - struct i915_request *rq; - - /* - * Completely unscientific finger-in-the-air estimates for suitable - * maximum user request size (to avoid blocking) and then backoff. - */ - if (intel_ring_update_space(ring) >= PAGE_SIZE) - return NULL; - - /* - * Find a request that after waiting upon, there will be at least half - * the ring available. The hysteresis allows us to compete for the - * shared ring and should mean that we sleep less often prior to - * claiming our resources, but not so long that the ring completely - * drains before we can submit our next request. - */ - list_for_each_entry(rq, &ring->request_list, ring_link) { - if (__intel_ring_space(rq->postfix, - ring->emit, ring->size) > ring->size / 2) - break; - } - if (&rq->ring_link == &ring->request_list) - return NULL; /* weird, we will check again later for real */ - - return i915_request_get(rq); -} - -static int eb_wait_for_ring(const struct i915_execbuffer *eb) -{ - struct i915_request *rq; - int ret = 0; - - /* - * Apply a light amount of backpressure to prevent excessive hogs - * from blocking waiting for space whilst holding struct_mutex and - * keeping all of their resources pinned. - */ - - rq = __eb_wait_for_ring(eb->context->ring); - if (rq) { - mutex_unlock(&eb->i915->drm.struct_mutex); - - if (i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT) < 0) - ret = -EINTR; - - i915_request_put(rq); - - mutex_lock(&eb->i915->drm.struct_mutex); - } - - return ret; -} - static int eb_lookup_vmas(struct i915_execbuffer *eb) { struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; @@ -802,9 +730,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) unsigned int i, batch; int err; - if (unlikely(i915_gem_context_is_banned(eb->gem_context))) - return -EIO; - INIT_LIST_HEAD(&eb->relocs); INIT_LIST_HEAD(&eb->unbound); @@ -831,7 +756,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) goto err_vma; } - vma = i915_vma_instance(obj, eb->vm, NULL); + vma = i915_vma_instance(obj, eb->context->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -962,7 +887,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; - cache->node.allocated = false; + cache->node.flags = 0; cache->rq = NULL; cache->rq_size = 0; } @@ -994,7 +919,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size); i915_gem_object_unpin_map(cache->rq->batch->obj); - i915_gem_chipset_flush(cache->rq->i915); + intel_gt_chipset_flush(cache->rq->engine->gt); i915_request_add(cache->rq); cache->rq = NULL; @@ -1018,15 +943,18 @@ static void reloc_cache_reset(struct reloc_cache *cache) kunmap_atomic(vaddr); i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); } else { - wmb(); + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + + intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __iomem *)vaddr); - if (cache->node.allocated) { - struct i915_ggtt *ggtt = cache_to_ggtt(cache); + if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(&cache->node); + mutex_unlock(&ggtt->vm.mutex); } else { i915_vma_unpin((struct i915_vma *)cache->node.mm); } @@ -1077,11 +1005,15 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, void *vaddr; if (cache->vaddr) { + intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); } else { struct i915_vma *vma; int err; + if (i915_gem_object_is_tiled(obj)) + return ERR_PTR(-EINVAL); + if (use_cpu_reloc(cache, obj)) return NULL; @@ -1093,32 +1025,27 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | - PIN_NONBLOCK | - PIN_NONFAULT); + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); + mutex_lock(&ggtt->vm.mutex); err = drm_mm_insert_node_in_range (&ggtt->vm.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { - err = i915_vma_put_fence(vma); - if (err) { - i915_vma_unpin(vma); - return ERR_PTR(err); - } - cache->node.start = vma->node.start; cache->node.mm = (void *)vma; } } offset = cache->node.start; - if (cache->node.allocated) { - wmb(); + if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, page), offset, I915_CACHE_NONE, 0); @@ -1201,25 +1128,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct drm_i915_gem_object *obj; + struct intel_engine_pool_node *pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); + pool = intel_engine_get_pool(eb->engine, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); - cmd = i915_gem_object_pin_map(obj, + cmd = i915_gem_object_pin_map(pool->obj, cache->has_llc ? I915_MAP_FORCE_WB : I915_MAP_FORCE_WC); - i915_gem_object_unpin_pages(obj); - if (IS_ERR(cmd)) - return PTR_ERR(cmd); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_pool; + } - batch = i915_vma_instance(obj, vma->vm, NULL); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_unmap; @@ -1235,6 +1163,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_unpin; } + err = intel_engine_pool_mark_active(pool, rq); + if (err) + goto err_request; + err = reloc_move_to_gpu(rq, vma); if (err) goto err_request; @@ -1246,8 +1178,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto skip_request; i915_vma_lock(batch); - GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); i915_vma_unlock(batch); if (err) goto skip_request; @@ -1260,7 +1193,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq_size = 0; /* Return with batch mapping (cmd) still pinned */ - return 0; + goto out_pool; skip_request: i915_request_skip(rq, err); @@ -1269,7 +1202,9 @@ err_request: err_unpin: i915_vma_unpin(batch); err_unmap: - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(pool->obj); +out_pool: + intel_engine_pool_put(pool); return err; } @@ -1286,10 +1221,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (unlikely(!cache->rq)) { int err; - /* If we need to copy for the cmdparser, we will stall anyway */ - if (eb_use_cmdparser(eb)) - return ERR_PTR(-EWOULDBLOCK); - if (!intel_engine_can_store_dword(eb->engine)) return ERR_PTR(-ENODEV); @@ -1317,7 +1248,7 @@ relocate_entry(struct i915_vma *vma, if (!eb->reloc_cache.vaddr && (DBG_FORCE_RELOC == FORCE_GPU_RELOC || - !reservation_object_test_signaled_rcu(vma->resv, true))) { + !dma_resv_test_signaled_rcu(vma->resv, true))) { const unsigned int gen = eb->reloc_cache.gen; unsigned int len; u32 *batch; @@ -1442,7 +1373,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && IS_GEN(eb->i915, 6)) { err = i915_vma_bind(target, target->obj->cache_level, - PIN_GLOBAL); + PIN_GLOBAL, NULL); if (WARN_ONCE(err, "Unexpected failure to bind target VMA!")) return err; @@ -1952,7 +1883,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) eb->exec = NULL; /* Unconditionally flush any chipset caches (for streaming writes). */ - i915_gem_chipset_flush(eb->i915); + intel_gt_chipset_flush(eb->engine->gt); return 0; err_skip: @@ -1960,15 +1891,15 @@ err_skip: return err; } -static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) { if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) - return false; + return -EINVAL; /* Kernel clipping was a DRI1 misfeature */ if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { if (exec->num_cliprects || exec->cliprects_ptr) - return false; + return -EINVAL; } if (exec->DR4 == 0xffffffff) { @@ -1976,12 +1907,12 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) exec->DR4 = 0; } if (exec->DR1 || exec->DR4) - return false; + return -EINVAL; if ((exec->batch_start_offset | exec->batch_len) & 0x7) - return false; + return -EINVAL; - return true; + return 0; } static int i915_reset_gen7_sol_offsets(struct i915_request *rq) @@ -2009,51 +1940,227 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) return 0; } -static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) +static struct i915_vma * +shadow_batch_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + unsigned int flags) { - struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; int err; - shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, - PAGE_ALIGN(eb->batch_len)); - if (IS_ERR(shadow_batch_obj)) - return ERR_CAST(shadow_batch_obj); - - err = intel_engine_cmd_parser(eb->engine, - eb->batch->obj, - shadow_batch_obj, - eb->batch_start_offset, - eb->batch_len, - is_master); - if (err) { - if (err == -EACCES) /* unhandled chained batch */ - vma = NULL; - else - vma = ERR_PTR(err); - goto out; + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return vma; + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + return ERR_PTR(err); + + return vma; +} + +struct eb_parse_work { + struct dma_fence_work base; + struct intel_engine_cs *engine; + struct i915_vma *batch; + struct i915_vma *shadow; + struct i915_vma *trampoline; + unsigned int batch_offset; + unsigned int batch_length; +}; + +static int __eb_parse(struct dma_fence_work *work) +{ + struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + + return intel_engine_cmd_parser(pw->engine, + pw->batch, + pw->batch_offset, + pw->batch_length, + pw->shadow, + pw->trampoline); +} + +static void __eb_parse_release(struct dma_fence_work *work) +{ + struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + + if (pw->trampoline) + i915_active_release(&pw->trampoline->active); + i915_active_release(&pw->shadow->active); + i915_active_release(&pw->batch->active); +} + +static const struct dma_fence_work_ops eb_parse_ops = { + .name = "eb_parse", + .work = __eb_parse, + .release = __eb_parse_release, +}; + +static int eb_parse_pipeline(struct i915_execbuffer *eb, + struct i915_vma *shadow, + struct i915_vma *trampoline) +{ + struct eb_parse_work *pw; + int err; + + pw = kzalloc(sizeof(*pw), GFP_KERNEL); + if (!pw) + return -ENOMEM; + + err = i915_active_acquire(&eb->batch->active); + if (err) + goto err_free; + + err = i915_active_acquire(&shadow->active); + if (err) + goto err_batch; + + if (trampoline) { + err = i915_active_acquire(&trampoline->active); + if (err) + goto err_shadow; } - vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - goto out; + dma_fence_work_init(&pw->base, &eb_parse_ops); + + pw->engine = eb->engine; + pw->batch = eb->batch; + pw->batch_offset = eb->batch_start_offset; + pw->batch_length = eb->batch_len; + pw->shadow = shadow; + pw->trampoline = trampoline; + + err = dma_resv_lock_interruptible(pw->batch->resv, NULL); + if (err) + goto err_trampoline; + + err = dma_resv_reserve_shared(pw->batch->resv, 1); + if (err) + goto err_batch_unlock; + + /* Wait for all writes (and relocs) into the batch to complete */ + err = i915_sw_fence_await_reservation(&pw->base.chain, + pw->batch->resv, NULL, false, + 0, I915_FENCE_GFP); + if (err < 0) + goto err_batch_unlock; + + /* Keep the batch alive and unwritten as we parse */ + dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); + + dma_resv_unlock(pw->batch->resv); + + /* Force execution to wait for completion of the parser */ + dma_resv_lock(shadow->resv, NULL); + dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); + dma_resv_unlock(shadow->resv); + + dma_fence_work_commit(&pw->base); + return 0; + +err_batch_unlock: + dma_resv_unlock(pw->batch->resv); +err_trampoline: + if (trampoline) + i915_active_release(&trampoline->active); +err_shadow: + i915_active_release(&shadow->active); +err_batch: + i915_active_release(&eb->batch->active); +err_free: + kfree(pw); + return err; +} + +static int eb_parse(struct i915_execbuffer *eb) +{ + struct intel_engine_pool_node *pool; + struct i915_vma *shadow, *trampoline; + unsigned int len; + int err; + + if (!eb_use_cmdparser(eb)) + return 0; + + len = eb->batch_len; + if (!CMDPARSER_USES_GGTT(eb->i915)) { + /* + * ppGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering + */ + if (!eb->context->vm->has_read_only) { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return -EINVAL; + } + } else { + len += I915_CMD_PARSER_TRAMPOLINE_SIZE; + } + + pool = intel_engine_get_pool(eb->engine, len); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER); + if (IS_ERR(shadow)) { + err = PTR_ERR(shadow); + goto err; + } + i915_gem_object_set_readonly(shadow->obj); + + trampoline = NULL; + if (CMDPARSER_USES_GGTT(eb->i915)) { + trampoline = shadow; + + shadow = shadow_batch_pin(pool->obj, + &eb->engine->gt->ggtt->vm, + PIN_GLOBAL); + if (IS_ERR(shadow)) { + err = PTR_ERR(shadow); + shadow = trampoline; + goto err_shadow; + } + + eb->batch_flags |= I915_DISPATCH_SECURE; + } + + err = eb_parse_pipeline(eb, shadow, trampoline); + if (err) + goto err_trampoline; - eb->vma[eb->buffer_count] = i915_vma_get(vma); + eb->vma[eb->buffer_count] = i915_vma_get(shadow); eb->flags[eb->buffer_count] = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; - vma->exec_flags = &eb->flags[eb->buffer_count]; + shadow->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; -out: - i915_gem_object_unpin_pages(shadow_batch_obj); - return vma; + eb->trampoline = trampoline; + eb->batch_start_offset = 0; + eb->batch = shadow; + + shadow->private = pool; + return 0; + +err_trampoline: + if (trampoline) + i915_vma_unpin(trampoline); +err_shadow: + i915_vma_unpin(shadow); +err: + intel_engine_pool_put(pool); + return err; } static void add_to_client(struct i915_request *rq, struct drm_file *file) { - rq->file_priv = file->driver_priv; - list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); + struct drm_i915_file_private *file_priv = file->driver_priv; + + rq->file_priv = file_priv; + + spin_lock(&file_priv->mm.lock); + list_add_tail(&rq->client_link, &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); } static int eb_submit(struct i915_execbuffer *eb) @@ -2090,9 +2197,28 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; + if (eb->trampoline) { + GEM_BUG_ON(eb->batch_start_offset); + err = eb->engine->emit_bb_start(eb->request, + eb->trampoline->node.start + + eb->batch_len, + 0, 0); + if (err) + return err; + } + + if (intel_context_nopreempt(eb->context)) + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); + return 0; } +static int num_vcs_engines(const struct drm_i915_private *i915) +{ + return hweight64(INTEL_INFO(i915)->engine_mask & + GENMASK_ULL(VCS0 + I915_MAX_VCS - 1, VCS0)); +} + /* * Find one BSD ring to dispatch the corresponding BSD command. * The engine index is returned. @@ -2105,8 +2231,8 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) - file_priv->bsd_engine = atomic_fetch_xor(1, - &dev_priv->mm.bsd_engine_dispatch_index); + file_priv->bsd_engine = + get_random_int() % num_vcs_engines(dev_priv); return file_priv->bsd_engine; } @@ -2119,18 +2245,57 @@ static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_VEBOX] = VECS0 }; -static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) +static struct i915_request *eb_throttle(struct intel_context *ce) +{ + struct intel_ring *ring = ce->ring; + struct intel_timeline *tl = ce->timeline; + struct i915_request *rq; + + /* + * Completely unscientific finger-in-the-air estimates for suitable + * maximum user request size (to avoid blocking) and then backoff. + */ + if (intel_ring_update_space(ring) >= PAGE_SIZE) + return NULL; + + /* + * Find a request that after waiting upon, there will be at least half + * the ring available. The hysteresis allows us to compete for the + * shared ring and should mean that we sleep less often prior to + * claiming our resources, but not so long that the ring completely + * drains before we can submit our next request. + */ + list_for_each_entry(rq, &tl->requests, link) { + if (rq->ring != ring) + continue; + + if (__intel_ring_space(rq->postfix, + ring->emit, ring->size) > ring->size / 2) + break; + } + if (&rq->link == &tl->requests) + return NULL; /* weird, we will check again later for real */ + + return i915_request_get(rq); +} + +static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) { + struct intel_timeline *tl; + struct i915_request *rq; int err; /* * ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ - err = i915_terminally_wedged(eb->i915); + err = intel_gt_terminally_wedged(ce->engine->gt); if (err) return err; + if (unlikely(intel_context_is_banned(ce))) + return -EIO; + /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for @@ -2140,14 +2305,60 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) if (err) return err; + /* + * Take a local wakeref for preparing to dispatch the execbuf as + * we expect to access the hardware fairly frequently in the + * process, and require the engine to be kept awake between accesses. + * Upon dispatch, we acquire another prolonged wakeref that we hold + * until the timeline is idle, which in turn releases the wakeref + * taken on the engine, and the parent device. + */ + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto err_unpin; + } + + intel_context_enter(ce); + rq = eb_throttle(ce); + + intel_context_timeline_unlock(tl); + + if (rq) { + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) { + i915_request_put(rq); + err = -EINTR; + goto err_exit; + } + + i915_request_put(rq); + } + eb->engine = ce->engine; eb->context = ce; return 0; + +err_exit: + mutex_lock(&tl->mutex); + intel_context_exit(ce); + intel_context_timeline_unlock(tl); +err_unpin: + intel_context_unpin(ce); + return err; } -static void eb_unpin_context(struct i915_execbuffer *eb) +static void eb_unpin_engine(struct i915_execbuffer *eb) { - intel_context_unpin(eb->context); + struct intel_context *ce = eb->context; + struct intel_timeline *tl = ce->timeline; + + mutex_lock(&tl->mutex); + intel_context_exit(ce); + mutex_unlock(&tl->mutex); + + intel_context_unpin(ce); } static unsigned int @@ -2165,7 +2376,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, return -1; } - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { + if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { @@ -2192,9 +2403,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, } static int -eb_select_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_pin_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { struct intel_context *ce; unsigned int idx; @@ -2209,7 +2420,7 @@ eb_select_engine(struct i915_execbuffer *eb, if (IS_ERR(ce)) return PTR_ERR(ce); - err = eb_pin_context(eb, ce); + err = __eb_pin_engine(eb, ce); intel_context_put(ce); return err; @@ -2351,6 +2562,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_gem_exec_object2 *exec, struct drm_syncobj **fences) { + struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct dma_fence *exec_fence = NULL; @@ -2362,7 +2574,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); - eb.i915 = to_i915(dev); + eb.i915 = i915; eb.file = file; eb.args = args; if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) @@ -2379,11 +2591,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.buffer_count = args->buffer_count; eb.batch_start_offset = args->batch_start_offset; eb.batch_len = args->batch_len; + eb.trampoline = NULL; eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { + if (INTEL_GEN(i915) >= 11) + return -ENODEV; + + /* Return -EPERM to trigger fallback code on old binaries. */ + if (!HAS_SECURE_BATCHES(i915)) + return -EPERM; + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) - return -EPERM; + return -EPERM; eb.batch_flags |= I915_DISPATCH_SECURE; } @@ -2427,25 +2647,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - /* - * Take a local wakeref for preparing to dispatch the execbuf as - * we expect to access the hardware fairly frequently in the - * process. Upon first dispatch, we acquire another prolonged - * wakeref that we hold until the GPU has been idle for at least - * 100ms. - */ - intel_gt_pm_get(eb.i915); + err = eb_pin_engine(&eb, file, args); + if (unlikely(err)) + goto err_context; err = i915_mutex_lock_interruptible(dev); if (err) - goto err_rpm; - - err = eb_select_engine(&eb, file, args); - if (unlikely(err)) - goto err_unlock; - - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ - if (unlikely(err)) goto err_engine; err = eb_relocate(&eb); @@ -2473,34 +2680,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (eb_use_cmdparser(&eb)) { - struct i915_vma *vma; - - vma = eb_parse(&eb, drm_is_current_master(file)); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_vma; - } - - if (vma) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - eb.batch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; - eb.batch = vma; - } - } - if (eb.batch_len == 0) eb.batch_len = eb.batch->size - eb.batch_start_offset; + err = eb_parse(&eb); + if (err) + goto err_vma; + /* * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. @@ -2572,11 +2758,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, * to explicitly hold another reference here. */ eb.request->batch = eb.batch; + if (eb.batch->private) + intel_engine_pool_mark_active(eb.batch->private, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); err_request: add_to_client(eb.request, file); + i915_request_get(eb.request); i915_request_add(eb.request); if (fences) @@ -2592,19 +2781,22 @@ err_request: fput(out_fence->file); } } + i915_request_put(eb.request); err_batch_unpin: if (eb.batch_flags & I915_DISPATCH_SECURE) i915_vma_unpin(eb.batch); + if (eb.batch->private) + intel_engine_pool_put(eb.batch->private); err_vma: if (eb.exec) eb_release_vmas(&eb); -err_engine: - eb_unpin_context(&eb); -err_unlock: + if (eb.trampoline) + i915_vma_unpin(eb.trampoline); mutex_unlock(&dev->struct_mutex); -err_rpm: - intel_gt_pm_put(eb.i915); +err_engine: + eb_unpin_engine(&eb); +err_context: i915_gem_context_put(eb.gem_context); err_destroy: eb_destroy(&eb); @@ -2670,8 +2862,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); - if (!i915_gem_check_execbuffer(&exec2)) - return -EINVAL; + err = i915_gem_check_execbuffer(&exec2); + if (err) + return err; /* Copy in the exec list from userland */ exec_list = kvmalloc_array(count, sizeof(*exec_list), @@ -2748,8 +2941,9 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - if (!i915_gem_check_execbuffer(args)) - return -EINVAL; + err = i915_gem_check_execbuffer(args); + if (err) + return err; /* Allocate an extra slot for use by the command parser */ exec2_list = kvmalloc_array(count + 1, eb_element_size(), diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c index cf0439e6be83..2f6100ec2608 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c @@ -69,8 +69,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) i915_sw_fence_init(&stub->chain, stub_notify); dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock, - to_i915(obj->base.dev)->mm.unordered_timeline, - 0); + 0, 0); if (i915_sw_fence_await_reservation(&stub->chain, obj->base.resv, NULL, @@ -78,7 +77,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) I915_FENCE_GFP) < 0) goto err; - reservation_object_add_excl_fence(obj->base.resv, &stub->dma); + dma_resv_add_excl_fence(obj->base.resv, &stub->dma); return &stub->dma; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 0c41e04ab8fa..9cfb0e41ff06 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -117,13 +117,6 @@ create_st: goto err; } - /* Mark the pages as dontneed whilst they are still pinned. As soon - * as they are unpinned they are allowed to be reaped by the shrinker, - * and the caller is expected to repopulate - the contents of this - * object are only valid whilst active and pinned. - */ - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -143,7 +136,6 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, internal_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { @@ -172,6 +164,7 @@ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915, phys_addr_t size) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -186,7 +179,16 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class); + + /* + * Mark the object as volatile, such that the pages are marked as + * dontneed whilst they are still pinned. As soon as they are unpinned + * they are allowed to be reaped by the shrinker, and the caller is + * expected to repopulate - the contents of this object are only valid + * whilst active and pinned. + */ + i915_gem_object_set_volatile(obj); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h index ddc7f2a52b3e..87d8b27f426d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h @@ -28,8 +28,8 @@ int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); +int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); int i915_gem_pread_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c new file mode 100644 index 000000000000..70543c83df06 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_lmem.h" +#include "i915_drv.h" + +const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { + .flags = I915_GEM_OBJECT_HAS_IOMEM, + + .get_pages = i915_gem_object_get_pages_buddy, + .put_pages = i915_gem_object_put_pages_buddy, + .release = i915_gem_object_release_memory_region, +}; + +bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) +{ + return obj->ops == &i915_gem_lmem_obj_ops; +} + +struct drm_i915_gem_object * +i915_gem_object_create_lmem(struct drm_i915_private *i915, + resource_size_t size, + unsigned int flags) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + size, flags); +} + +struct drm_i915_gem_object * +__i915_gem_lmem_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + static struct lock_class_key lock_class; + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class); + + obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + + i915_gem_object_init_memory_region(obj, mem, flags); + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h new file mode 100644 index 000000000000..fc3f15580fe3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_LMEM_H +#define __I915_GEM_LMEM_H + +#include <linux/types.h> + +struct drm_i915_private; +struct drm_i915_gem_object; +struct intel_memory_region; + +extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops; + +bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +i915_gem_object_create_lmem(struct drm_i915_private *i915, + resource_size_t size, + unsigned int flags); + +struct drm_i915_gem_object * +__i915_gem_lmem_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); + +#endif /* !__I915_GEM_LMEM_H */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 39a661927d8e..0b6a442108de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -4,15 +4,22 @@ * Copyright © 2014-2016 Intel Corporation */ +#include <linux/anon_inodes.h> #include <linux/mman.h> +#include <linux/pfn_t.h> #include <linux/sizes.h> +#include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" + #include "i915_drv.h" #include "i915_gem_gtt.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" +#include "i915_gem_mman.h" +#include "i915_trace.h" +#include "i915_user_extensions.h" #include "i915_vma.h" -#include "intel_drv.h" static inline bool __vma_matches(struct vm_area_struct *vma, struct file *filp, @@ -99,9 +106,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, up_write(&mm->mmap_sem); if (IS_ERR_VALUE(addr)) goto err; - - /* This may race, but that's ok, it only gets set */ - WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); } i915_gem_object_put(obj); @@ -144,6 +148,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) * 3 - Remove implicit set-domain(GTT) and synchronisation on initial * pagefault; swapin remains transparent. * + * 4 - Support multiple fault handlers per object depending on object's + * backing storage (a.k.a. MMAP_OFFSET). + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -171,7 +178,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 3; + return 4; } static inline struct i915_ggtt_view @@ -197,29 +204,80 @@ compute_partial_view(const struct drm_i915_gem_object *obj, return view; } -/** - * i915_gem_fault - fault a page into the GTT - * @vmf: fault info - * - * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped - * from userspace. The fault handler takes care of binding the object to - * the GTT (if needed), allocating and programming a fence register (again, - * only if needed based on whether the old reg is still valid or the object - * is tiled) and inserting a new PTE into the faulting process. - * - * Note that the faulting process may involve evicting existing objects - * from the GTT and/or fence registers to make room. So performance may - * suffer if the GTT working set is large or there are few fence registers - * left. - * - * The current feature set supported by i915_gem_fault() and thus GTT mmaps - * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). - */ -vm_fault_t i915_gem_fault(struct vm_fault *vmf) +static vm_fault_t i915_error_to_vmf_fault(int err) +{ + switch (err) { + default: + WARN_ONCE(err, "unhandled error in %s: %i\n", __func__, err); + /* fallthrough */ + case -EIO: /* shmemfs failure from swap device */ + case -EFAULT: /* purged object */ + case -ENODEV: /* bad object, how did you get here! */ + case -ENXIO: /* unable to access backing store (on device) */ + return VM_FAULT_SIGBUS; + + case -ENOSPC: /* shmemfs allocation failure */ + case -ENOMEM: /* our allocation failure */ + return VM_FAULT_OOM; + + case 0: + case -EAGAIN: + case -ERESTARTSYS: + case -EINTR: + case -EBUSY: + /* + * EBUSY is ok: this just means that another thread + * already did the job. + */ + return VM_FAULT_NOPAGE; + } +} + +static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) +{ + struct vm_area_struct *area = vmf->vma; + struct i915_mmap_offset *mmo = area->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + resource_size_t iomap; + int err; + + /* Sanity check that we allow writing into this object */ + if (unlikely(i915_gem_object_is_readonly(obj) && + area->vm_flags & VM_WRITE)) + return VM_FAULT_SIGBUS; + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out; + + iomap = -1; + if (!i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE)) { + iomap = obj->mm.region->iomap.base; + iomap -= obj->mm.region->region.start; + } + + /* PTEs are revoked in obj->ops->put_pages() */ + err = remap_io_sg(area, + area->vm_start, area->vm_end - area->vm_start, + obj->mm.pages->sgl, iomap); + + if (area->vm_flags & VM_WRITE) { + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + obj->mm.dirty = true; + } + + i915_gem_object_unpin_pages(obj); + +out: + return i915_error_to_vmf_fault(err); +} + +static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) struct vm_area_struct *area = vmf->vma; - struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); + struct i915_mmap_offset *mmo = area->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; struct drm_device *dev = obj->base.dev; struct drm_i915_private *i915 = to_i915(dev); struct intel_runtime_pm *rpm = &i915->runtime_pm; @@ -246,34 +304,22 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) wakeref = intel_runtime_pm_get(rpm); - srcu = i915_reset_trylock(i915); - if (srcu < 0) { - ret = srcu; - goto err_rpm; - } - - ret = i915_mutex_lock_interruptible(dev); + ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); if (ret) - goto err_reset; - - /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { - ret = -EFAULT; - goto err_unlock; - } + goto err_rpm; /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | - PIN_NONBLOCK | - PIN_NONFAULT); + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (IS_ERR(vma)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); unsigned int flags; - flags = PIN_MAPPABLE; + flags = PIN_MAPPABLE | PIN_NOSEARCH; if (view.type == I915_GGTT_VIEW_NORMAL) flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ @@ -281,18 +327,26 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) * Userspace is now writing through an untracked VMA, abandon * all hope that the hardware is able to track future writes. */ - obj->frontbuffer_ggtt_origin = ORIGIN_CPU; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); - if (IS_ERR(vma) && !view.type) { + if (IS_ERR(vma)) { flags = PIN_MAPPABLE; view.type = I915_GGTT_VIEW_PARTIAL; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); } + + /* The entire mappable GGTT is pinned? Unexpected! */ + GEM_BUG_ON(vma == ERR_PTR(-ENOSPC)); } if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err_unlock; + goto err_reset; + } + + /* Access to snoopable pages through the GTT is incoherent. */ + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { + ret = -EFAULT; + goto err_unpin; } ret = i915_vma_pin_fence(vma); @@ -308,101 +362,67 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_fence; - /* Mark as being mmapped into userspace for later revocation */ assert_rpm_wakelock_held(rpm); + + /* Mark as being mmapped into userspace for later revocation */ + mutex_lock(&i915->ggtt.vm.mutex); if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) list_add(&obj->userfault_link, &i915->ggtt.userfault_list); - if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) + mutex_unlock(&i915->ggtt.vm.mutex); + + /* Track the mmo associated with the fenced vma */ + vma->mmo = mmo; + + if (IS_ACTIVE(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); - GEM_BUG_ON(!obj->userfault_count); - i915_vma_set_ggtt_write(vma); + if (write) { + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + i915_vma_set_ggtt_write(vma); + obj->mm.dirty = true; + } err_fence: i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); -err_unlock: - mutex_unlock(&dev->struct_mutex); err_reset: - i915_reset_unlock(i915, srcu); + intel_gt_reset_unlock(ggtt->vm.gt, srcu); err_rpm: intel_runtime_pm_put(rpm, wakeref); i915_gem_object_unpin_pages(obj); err: - switch (ret) { - case -EIO: - /* - * We eat errors when the gpu is terminally wedged to avoid - * userspace unduly crashing (gl has no provisions for mmaps to - * fail). But any other -EIO isn't ours (e.g. swap in failure) - * and so needs to be reported. - */ - if (!i915_terminally_wedged(i915)) - return VM_FAULT_SIGBUS; - /* else, fall through */ - case -EAGAIN: - /* - * EAGAIN means the gpu is hung and we'll wait for the error - * handler to reset everything when re-faulting in - * i915_mutex_lock_interruptible. - */ - case 0: - case -ERESTARTSYS: - case -EINTR: - case -EBUSY: - /* - * EBUSY is ok: this just means that another thread - * already did the job. - */ - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - case -ENOSPC: - case -EFAULT: - return VM_FAULT_SIGBUS; - default: - WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); - return VM_FAULT_SIGBUS; - } + return i915_error_to_vmf_fault(ret); } -void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj) { struct i915_vma *vma; GEM_BUG_ON(!obj->userfault_count); - obj->userfault_count = 0; - list_del(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); - for_each_ggtt_vma(vma, obj) - i915_vma_unset_userfault(vma); + i915_vma_revoke_mmap(vma); + + GEM_BUG_ON(obj->userfault_count); } -/** - * i915_gem_object_release_mmap - remove physical page mappings - * @obj: obj in question - * - * Preserve the reservation of the mmapping with the DRM core code, but - * relinquish ownership of the pages back to the system. - * +/* * It is vital that we remove the page mapping if we have mapped a tiled * object through the GTT and then lose the fence register due to * resource pressure. Similarly if the object has been moved out of the * aperture, than pages mapped into userspace must be revoked. Removing the * mapping will then trigger a page fault on the next user access, allowing - * fixup by i915_gem_fault(). + * fixup by vm_fault_gtt(). */ -void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +static void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); intel_wakeref_t wakeref; - /* Serialisation between user GTT access and our code depends upon + /* + * Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user * pagefault then has to wait until we release the mutex. * @@ -410,15 +430,16 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) * requirement that operations to the GGTT be made holding the RPM * wakeref. */ - lockdep_assert_held(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); + mutex_lock(&i915->ggtt.vm.mutex); if (!obj->userfault_count) goto out; - __i915_gem_object_release_mmap(obj); + __i915_gem_object_release_mmap_gtt(obj); - /* Ensure that the CPU's PTE are revoked and there are not outstanding + /* + * Ensure that the CPU's PTE are revoked and there are not outstanding * memory transactions from userspace before we return. The TLB * flushing implied above by changing the PTE above *should* be * sufficient, an extra barrier here just provides us with a bit @@ -428,59 +449,217 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) wmb(); out: + mutex_unlock(&i915->ggtt.vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); } -static int create_mmap_offset(struct drm_i915_gem_object *obj) +void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) +{ + struct i915_mmap_offset *mmo, *mn; + + spin_lock(&obj->mmo.lock); + rbtree_postorder_for_each_entry_safe(mmo, mn, + &obj->mmo.offsets, offset) { + /* + * vma_node_unmap for GTT mmaps handled already in + * __i915_gem_object_release_mmap_gtt + */ + if (mmo->mmap_type == I915_MMAP_TYPE_GTT) + continue; + + spin_unlock(&obj->mmo.lock); + drm_vma_node_unmap(&mmo->vma_node, + obj->base.dev->anon_inode->i_mapping); + spin_lock(&obj->mmo.lock); + } + spin_unlock(&obj->mmo.lock); +} + +/** + * i915_gem_object_release_mmap - remove physical page mappings + * @obj: obj in question + * + * Preserve the reservation of the mmapping with the DRM core code, but + * relinquish ownership of the pages back to the system. + */ +void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + i915_gem_object_release_mmap_gtt(obj); + i915_gem_object_release_mmap_offset(obj); +} + +static struct i915_mmap_offset * +lookup_mmo(struct drm_i915_gem_object *obj, + enum i915_mmap_type mmap_type) +{ + struct rb_node *rb; + + spin_lock(&obj->mmo.lock); + rb = obj->mmo.offsets.rb_node; + while (rb) { + struct i915_mmap_offset *mmo = + rb_entry(rb, typeof(*mmo), offset); + + if (mmo->mmap_type == mmap_type) { + spin_unlock(&obj->mmo.lock); + return mmo; + } + + if (mmo->mmap_type < mmap_type) + rb = rb->rb_right; + else + rb = rb->rb_left; + } + spin_unlock(&obj->mmo.lock); + + return NULL; +} + +static struct i915_mmap_offset * +insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo) +{ + struct rb_node *rb, **p; + + spin_lock(&obj->mmo.lock); + rb = NULL; + p = &obj->mmo.offsets.rb_node; + while (*p) { + struct i915_mmap_offset *pos; + + rb = *p; + pos = rb_entry(rb, typeof(*pos), offset); + + if (pos->mmap_type == mmo->mmap_type) { + spin_unlock(&obj->mmo.lock); + drm_vma_offset_remove(obj->base.dev->vma_offset_manager, + &mmo->vma_node); + kfree(mmo); + return pos; + } + + if (pos->mmap_type < mmo->mmap_type) + p = &rb->rb_right; + else + p = &rb->rb_left; + } + rb_link_node(&mmo->offset, rb, p); + rb_insert_color(&mmo->offset, &obj->mmo.offsets); + spin_unlock(&obj->mmo.lock); + + return mmo; +} + +static struct i915_mmap_offset * +mmap_offset_attach(struct drm_i915_gem_object *obj, + enum i915_mmap_type mmap_type, + struct drm_file *file) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_mmap_offset *mmo; int err; - err = drm_gem_create_mmap_offset(&obj->base); + mmo = lookup_mmo(obj, mmap_type); + if (mmo) + goto out; + + mmo = kmalloc(sizeof(*mmo), GFP_KERNEL); + if (!mmo) + return ERR_PTR(-ENOMEM); + + mmo->obj = obj; + mmo->mmap_type = mmap_type; + drm_vma_node_reset(&mmo->vma_node); + + err = drm_vma_offset_add(obj->base.dev->vma_offset_manager, + &mmo->vma_node, obj->base.size / PAGE_SIZE); if (likely(!err)) - return 0; + goto insert; /* Attempt to reap some mmap space from dead objects */ - do { - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); + if (err) + goto err; - i915_gem_drain_freed_objects(i915); - err = drm_gem_create_mmap_offset(&obj->base); - if (!err) - break; + i915_gem_drain_freed_objects(i915); + err = drm_vma_offset_add(obj->base.dev->vma_offset_manager, + &mmo->vma_node, obj->base.size / PAGE_SIZE); + if (err) + goto err; - } while (flush_delayed_work(&i915->gem.retire_work)); +insert: + mmo = insert_mmo(obj, mmo); + GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo); +out: + if (file) + drm_vma_node_allow(&mmo->vma_node, file); + return mmo; - return err; +err: + kfree(mmo); + return ERR_PTR(err); } -int -i915_gem_mmap_gtt(struct drm_file *file, - struct drm_device *dev, - u32 handle, - u64 *offset) +static int +__assign_mmap_offset(struct drm_file *file, + u32 handle, + enum i915_mmap_type mmap_type, + u64 *offset) { struct drm_i915_gem_object *obj; - int ret; + struct i915_mmap_offset *mmo; + int err; obj = i915_gem_object_lookup(file, handle); if (!obj) return -ENOENT; - ret = create_mmap_offset(obj); - if (ret == 0) - *offset = drm_vma_node_offset_addr(&obj->base.vma_node); + if (mmap_type == I915_MMAP_TYPE_GTT && + i915_gem_object_never_bind_ggtt(obj)) { + err = -ENODEV; + goto out; + } + + if (mmap_type != I915_MMAP_TYPE_GTT && + !i915_gem_object_type_has(obj, + I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_HAS_IOMEM)) { + err = -ENODEV; + goto out; + } + + mmo = mmap_offset_attach(obj, mmap_type, file); + if (IS_ERR(mmo)) { + err = PTR_ERR(mmo); + goto out; + } + *offset = drm_vma_node_offset_addr(&mmo->vma_node); + err = 0; +out: i915_gem_object_put(obj); - return ret; + return err; +} + +int +i915_gem_dumb_mmap_offset(struct drm_file *file, + struct drm_device *dev, + u32 handle, + u64 *offset) +{ + enum i915_mmap_type mmap_type; + + if (boot_cpu_has(X86_FEATURE_PAT)) + mmap_type = I915_MMAP_TYPE_WC; + else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt)) + return -ENODEV; + else + mmap_type = I915_MMAP_TYPE_GTT; + + return __assign_mmap_offset(file, handle, mmap_type, offset); } /** - * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing + * i915_gem_mmap_offset_ioctl - prepare an object for GTT mmap'ing * @dev: DRM device * @data: GTT mapping ioctl data * @file: GEM object info @@ -495,12 +674,220 @@ i915_gem_mmap_gtt(struct drm_file *file, * userspace. */ int -i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) +i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_mmap_offset *args = data; + enum i915_mmap_type type; + int err; + + /* + * Historically we failed to check args.pad and args.offset + * and so we cannot use those fields for user input and we cannot + * add -EINVAL for them as the ABI is fixed, i.e. old userspace + * may be feeding in garbage in those fields. + * + * if (args->pad) return -EINVAL; is verbotten! + */ + + err = i915_user_extensions(u64_to_user_ptr(args->extensions), + NULL, 0, NULL); + if (err) + return err; + + switch (args->flags) { + case I915_MMAP_OFFSET_GTT: + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return -ENODEV; + type = I915_MMAP_TYPE_GTT; + break; + + case I915_MMAP_OFFSET_WC: + if (!boot_cpu_has(X86_FEATURE_PAT)) + return -ENODEV; + type = I915_MMAP_TYPE_WC; + break; + + case I915_MMAP_OFFSET_WB: + type = I915_MMAP_TYPE_WB; + break; + + case I915_MMAP_OFFSET_UC: + if (!boot_cpu_has(X86_FEATURE_PAT)) + return -ENODEV; + type = I915_MMAP_TYPE_UC; + break; + + default: + return -EINVAL; + } + + return __assign_mmap_offset(file, args->handle, type, &args->offset); +} + +static void vm_open(struct vm_area_struct *vma) +{ + struct i915_mmap_offset *mmo = vma->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + + GEM_BUG_ON(!obj); + i915_gem_object_get(obj); +} + +static void vm_close(struct vm_area_struct *vma) +{ + struct i915_mmap_offset *mmo = vma->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + + GEM_BUG_ON(!obj); + i915_gem_object_put(obj); +} + +static const struct vm_operations_struct vm_ops_gtt = { + .fault = vm_fault_gtt, + .open = vm_open, + .close = vm_close, +}; + +static const struct vm_operations_struct vm_ops_cpu = { + .fault = vm_fault_cpu, + .open = vm_open, + .close = vm_close, +}; + +static int singleton_release(struct inode *inode, struct file *file) +{ + struct drm_i915_private *i915 = file->private_data; + + cmpxchg(&i915->gem.mmap_singleton, file, NULL); + drm_dev_put(&i915->drm); + + return 0; +} + +static const struct file_operations singleton_fops = { + .owner = THIS_MODULE, + .release = singleton_release, +}; + +static struct file *mmap_singleton(struct drm_i915_private *i915) +{ + struct file *file; + + rcu_read_lock(); + file = i915->gem.mmap_singleton; + if (file && !get_file_rcu(file)) + file = NULL; + rcu_read_unlock(); + if (file) + return file; + + file = anon_inode_getfile("i915.gem", &singleton_fops, i915, O_RDWR); + if (IS_ERR(file)) + return file; + + /* Everyone shares a single global address space */ + file->f_mapping = i915->drm.anon_inode->i_mapping; + + smp_store_mb(i915->gem.mmap_singleton, file); + drm_dev_get(&i915->drm); + + return file; +} + +/* + * This overcomes the limitation in drm_gem_mmap's assignment of a + * drm_gem_object as the vma->vm_private_data. Since we need to + * be able to resolve multiple mmap offsets which could be tied + * to a single gem object. + */ +int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) { - struct drm_i915_gem_mmap_gtt *args = data; + struct drm_vma_offset_node *node; + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + struct drm_i915_gem_object *obj = NULL; + struct i915_mmap_offset *mmo = NULL; + struct file *anon; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + rcu_read_lock(); + drm_vma_offset_lock_lookup(dev->vma_offset_manager); + node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, + vma->vm_pgoff, + vma_pages(vma)); + if (node && drm_vma_node_is_allowed(node, priv)) { + /* + * Skip 0-refcnted objects as it is in the process of being + * destroyed and will be invalid when the vma manager lock + * is released. + */ + mmo = container_of(node, struct i915_mmap_offset, vma_node); + obj = i915_gem_object_get_rcu(mmo->obj); + } + drm_vma_offset_unlock_lookup(dev->vma_offset_manager); + rcu_read_unlock(); + if (!obj) + return node ? -EACCES : -EINVAL; + + if (i915_gem_object_is_readonly(obj)) { + if (vma->vm_flags & VM_WRITE) { + i915_gem_object_put(obj); + return -EINVAL; + } + vma->vm_flags &= ~VM_MAYWRITE; + } - return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); + anon = mmap_singleton(to_i915(dev)); + if (IS_ERR(anon)) { + i915_gem_object_put(obj); + return PTR_ERR(anon); + } + + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_private_data = mmo; + + /* + * We keep the ref on mmo->obj, not vm_file, but we require + * vma->vm_file->f_mapping, see vma_link(), for later revocation. + * Our userspace is accustomed to having per-file resource cleanup + * (i.e. contexts, objects and requests) on their close(fd), which + * requires avoiding extraneous references to their filp, hence why + * we prefer to use an anonymous file for their mmaps. + */ + fput(vma->vm_file); + vma->vm_file = anon; + + switch (mmo->mmap_type) { + case I915_MMAP_TYPE_WC: + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = &vm_ops_cpu; + break; + + case I915_MMAP_TYPE_WB: + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + vma->vm_ops = &vm_ops_cpu; + break; + + case I915_MMAP_TYPE_UC: + vma->vm_page_prot = + pgprot_noncached(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = &vm_ops_cpu; + break; + + case I915_MMAP_TYPE_GTT: + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = &vm_ops_gtt; + break; + } + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); + + return 0; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h new file mode 100644 index 000000000000..862e01b7cb69 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_MMAN_H__ +#define __I915_GEM_MMAN_H__ + +#include <linux/mm_types.h> +#include <linux/types.h> + +struct drm_device; +struct drm_file; +struct drm_i915_gem_object; +struct file; +struct i915_mmap_offset; +struct mutex; + +int i915_gem_mmap_gtt_version(void); +int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma); + +int i915_gem_dumb_mmap_offset(struct drm_file *file_priv, + struct drm_device *dev, + u32 handle, u64 *offset); + +void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj); +void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); +void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index be6caccce0c5..35985218bd85 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -22,13 +22,17 @@ * */ -#include "display/intel_frontbuffer.h" +#include <linux/sched/mm.h> +#include "display/intel_frontbuffer.h" +#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" +#include "i915_gem_mman.h" #include "i915_gem_object.h" #include "i915_globals.h" +#include "i915_trace.h" static struct i915_global_object { struct i915_global base; @@ -45,35 +49,26 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) return kmem_cache_free(global.slab_objects, obj); } -static void -frontbuffer_retire(struct i915_active_request *active, - struct i915_request *request) -{ - struct drm_i915_gem_object *obj = - container_of(active, typeof(*obj), frontbuffer_write); - - intel_fb_obj_flush(obj, ORIGIN_CS); -} - void i915_gem_object_init(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_object_ops *ops) + const struct drm_i915_gem_object_ops *ops, + struct lock_class_key *key) { - mutex_init(&obj->mm.lock); + __mutex_init(&obj->mm.lock, "obj->mm.lock", key); spin_lock_init(&obj->vma.lock); INIT_LIST_HEAD(&obj->vma.list); + INIT_LIST_HEAD(&obj->mm.link); + INIT_LIST_HEAD(&obj->lut_list); - INIT_LIST_HEAD(&obj->batch_pool_link); + + spin_lock_init(&obj->mmo.lock); + obj->mmo.offsets = RB_ROOT; init_rcu_head(&obj->rcu); obj->ops = ops; - obj->frontbuffer_ggtt_origin = ORIGIN_GTT; - i915_active_request_init(&obj->frontbuffer_write, - NULL, frontbuffer_retire); - obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_page.lock); @@ -105,6 +100,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { struct drm_i915_gem_object *obj = to_intel_bo(gem); struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_mmap_offset *mmo, *mn; struct i915_lut_handle *lut, *ln; LIST_HEAD(close); @@ -120,6 +116,11 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) } i915_gem_object_unlock(obj); + spin_lock(&obj->mmo.lock); + rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) + drm_vma_node_revoke(&mmo->vma_node, file); + spin_unlock(&obj->mmo.lock); + list_for_each_entry_safe(lut, ln, &close, obj_link) { struct i915_gem_context *ctx = lut->ctx; struct i915_vma *vma; @@ -146,6 +147,19 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) } } +static void __i915_gem_free_object_rcu(struct rcu_head *head) +{ + struct drm_i915_gem_object *obj = + container_of(head, typeof(*obj), rcu); + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + dma_resv_fini(&obj->base._resv); + i915_gem_object_free(obj); + + GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); + atomic_dec(&i915->mm.free_count); +} + static void __i915_gem_free_objects(struct drm_i915_private *i915, struct llist_node *freed) { @@ -154,125 +168,106 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, wakeref = intel_runtime_pm_get(&i915->runtime_pm); llist_for_each_entry_safe(obj, on, freed, freed) { - struct i915_vma *vma, *vn; + struct i915_mmap_offset *mmo, *mn; trace_i915_gem_object_destroy(obj); - mutex_lock(&i915->drm.struct_mutex); - - GEM_BUG_ON(i915_gem_object_is_active(obj)); - list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { - GEM_BUG_ON(i915_vma_is_active(vma)); - vma->flags &= ~I915_VMA_PIN_MASK; - i915_vma_destroy(vma); + if (!list_empty(&obj->vma.list)) { + struct i915_vma *vma; + + /* + * Note that the vma keeps an object reference while + * it is active, so it *should* not sleep while we + * destroy it. Our debug code errs insits it *might*. + * For the moment, play along. + */ + spin_lock(&obj->vma.lock); + while ((vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + GEM_BUG_ON(vma->obj != obj); + spin_unlock(&obj->vma.lock); + + __i915_vma_put(vma); + + spin_lock(&obj->vma.lock); + } + spin_unlock(&obj->vma.lock); } - GEM_BUG_ON(!list_empty(&obj->vma.list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); - /* - * This serializes freeing with the shrinker. Since the free - * is delayed, first by RCU then by the workqueue, we want the - * shrinker to be able to free pages of unreferenced objects, - * or else we may oom whilst there are plenty of deferred - * freed objects. - */ - if (i915_gem_object_has_pages(obj) && - i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; + i915_gem_object_release_mmap(obj); - spin_lock_irqsave(&i915->mm.obj_lock, flags); - list_del_init(&obj->mm.link); - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + rbtree_postorder_for_each_entry_safe(mmo, mn, + &obj->mmo.offsets, + offset) { + drm_vma_offset_remove(obj->base.dev->vma_offset_manager, + &mmo->vma_node); + kfree(mmo); } - - mutex_unlock(&i915->drm.struct_mutex); + obj->mmo.offsets = RB_ROOT; GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); - GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); GEM_BUG_ON(!list_empty(&obj->lut_list)); - if (obj->ops->release) - obj->ops->release(obj); - atomic_set(&obj->mm.pages_pin_count, 0); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); GEM_BUG_ON(i915_gem_object_has_pages(obj)); + bitmap_free(obj->bit_17); if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); - drm_gem_object_release(&obj->base); - - bitmap_free(obj->bit_17); - i915_gem_object_free(obj); + drm_gem_free_mmap_offset(&obj->base); - GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); - atomic_dec(&i915->mm.free_count); + if (obj->ops->release) + obj->ops->release(obj); - cond_resched(); + /* But keep the pointer alive for RCU-protected lookups */ + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); } intel_runtime_pm_put(&i915->runtime_pm, wakeref); } void i915_gem_flush_free_objects(struct drm_i915_private *i915) { - struct llist_node *freed; - - /* Free the oldest, most stale object to keep the free_list short */ - freed = NULL; - if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ - /* Only one consumer of llist_del_first() allowed */ - spin_lock(&i915->mm.free_lock); - freed = llist_del_first(&i915->mm.free_list); - spin_unlock(&i915->mm.free_lock); - } - if (unlikely(freed)) { - freed->next = NULL; + struct llist_node *freed = llist_del_all(&i915->mm.free_list); + + if (unlikely(freed)) __i915_gem_free_objects(i915, freed); - } } static void __i915_gem_free_work(struct work_struct *work) { struct drm_i915_private *i915 = container_of(work, struct drm_i915_private, mm.free_work); - struct llist_node *freed; - - /* - * All file-owned VMA should have been released by this point through - * i915_gem_close_object(), or earlier by i915_gem_context_close(). - * However, the object may also be bound into the global GTT (e.g. - * older GPUs without per-process support, or for direct access through - * the GTT either for the user or for scanout). Those VMA still need to - * unbound now. - */ - - spin_lock(&i915->mm.free_lock); - while ((freed = llist_del_all(&i915->mm.free_list))) { - spin_unlock(&i915->mm.free_lock); - __i915_gem_free_objects(i915, freed); - if (need_resched()) - return; - - spin_lock(&i915->mm.free_lock); - } - spin_unlock(&i915->mm.free_lock); + i915_gem_flush_free_objects(i915); } -static void __i915_gem_free_object_rcu(struct rcu_head *head) +void i915_gem_free_object(struct drm_gem_object *gem_obj) { - struct drm_i915_gem_object *obj = - container_of(head, typeof(*obj), rcu); + struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); struct drm_i915_private *i915 = to_i915(obj->base.dev); + GEM_BUG_ON(i915_gem_object_is_framebuffer(obj)); + /* - * We reuse obj->rcu for the freed list, so we had better not treat - * it like a rcu_head from this point forwards. And we expect all - * objects to be freed via this path. + * Before we free the object, make sure any pure RCU-only + * read-side critical sections are complete, e.g. + * i915_gem_busy_ioctl(). For the corresponding synchronized + * lookup see i915_gem_object_lookup_rcu(). */ - destroy_rcu_head(&obj->rcu); + atomic_inc(&i915->mm.free_count); + + /* + * This serializes freeing with the shrinker. Since the free + * is delayed, first by RCU then by the workqueue, we want the + * shrinker to be able to free pages of unreferenced objects, + * or else we may oom whilst there are plenty of deferred + * freed objects. + */ + i915_gem_object_make_unshrinkable(obj); /* * Since we require blocking on struct_mutex to unbind the freed @@ -288,27 +283,6 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head) queue_work(i915->wq, &i915->mm.free_work); } -void i915_gem_free_object(struct drm_gem_object *gem_obj) -{ - struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - - /* - * Before we free the object, make sure any pure RCU-only - * read-side critical sections are complete, e.g. - * i915_gem_busy_ioctl(). For the corresponding synchronized - * lookup see i915_gem_object_lookup_rcu(). - */ - atomic_inc(&to_i915(obj->base.dev)->mm.free_count); - call_rcu(&obj->rcu, __i915_gem_free_object_rcu); -} - -static inline enum fb_op_origin -fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) { return !(obj->cache_level == I915_CACHE_NONE || @@ -319,7 +293,6 @@ void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; assert_object_held(obj); @@ -329,17 +302,14 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, switch (obj->write_domain) { case I915_GEM_DOMAIN_GTT: - i915_gem_flush_ggtt_writes(dev_priv); - - intel_fb_obj_flush(obj, - fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); - + spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { - if (vma->iomap) - continue; - - i915_vma_unset_ggtt_write(vma); + if (i915_vma_unset_ggtt_write(vma)) + intel_gt_flush_ggtt_writes(vma->vm->gt); } + spin_unlock(&obj->vma.lock); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); break; case I915_GEM_DOMAIN_WC: @@ -359,6 +329,30 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, obj->write_domain = 0; } +void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + struct intel_frontbuffer *front; + + front = __intel_frontbuffer_get(obj); + if (front) { + intel_frontbuffer_flush(front, origin); + intel_frontbuffer_put(front); + } +} + +void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + struct intel_frontbuffer *front; + + front = __intel_frontbuffer_get(obj); + if (front) { + intel_frontbuffer_invalidate(front, origin); + intel_frontbuffer_put(front); + } +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index dfebd5706f16..9c86f2dea947 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -13,9 +13,10 @@ #include <drm/i915_drm.h> +#include "display/intel_frontbuffer.h" #include "i915_gem_object_types.h" - #include "i915_gem_gtt.h" +#include "i915_vma_types.h" void i915_gem_init__objects(struct drm_i915_private *i915); @@ -23,12 +24,14 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void); void i915_gem_object_free(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_object_ops *ops); + const struct drm_i915_gem_object_ops *ops, + struct lock_class_key *key); struct drm_i915_gem_object * -i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size); +i915_gem_object_create_shmem(struct drm_i915_private *i915, + resource_size_t size); struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, - const void *data, size_t size); + const void *data, resource_size_t size); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, @@ -67,21 +70,29 @@ i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) } static inline struct drm_i915_gem_object * +i915_gem_object_get_rcu(struct drm_i915_gem_object *obj) +{ + if (obj && !kref_get_unless_zero(&obj->base.refcount)) + obj = NULL; + + return obj; +} + +static inline struct drm_i915_gem_object * i915_gem_object_lookup(struct drm_file *file, u32 handle) { struct drm_i915_gem_object *obj; rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, handle); - if (obj && !kref_get_unless_zero(&obj->base.refcount)) - obj = NULL; + obj = i915_gem_object_get_rcu(obj); rcu_read_unlock(); return obj; } __deprecated -extern struct drm_gem_object * +struct drm_gem_object * drm_gem_object_lookup(struct drm_file *file, u32 handle); __attribute__((nonnull)) @@ -99,22 +110,27 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) __drm_gem_object_put(&obj->base); } -#define assert_object_held(obj) reservation_object_assert_held((obj)->base.resv) +#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv) static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) { - reservation_object_lock(obj->base.resv, NULL); + dma_resv_lock(obj->base.resv, NULL); +} + +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +{ + return dma_resv_trylock(obj->base.resv); } static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) { - return reservation_object_lock_interruptible(obj->base.resv, NULL); + return dma_resv_lock_interruptible(obj->base.resv, NULL); } static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) { - reservation_object_unlock(obj->base.resv); + dma_resv_unlock(obj->base.resv); } struct dma_fence * @@ -125,49 +141,74 @@ void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj, static inline void i915_gem_object_set_readonly(struct drm_i915_gem_object *obj) { - obj->base.vma_node.readonly = true; + obj->flags |= I915_BO_READONLY; } static inline bool i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) { - return obj->base.vma_node.readonly; + return obj->flags & I915_BO_READONLY; +} + +static inline bool +i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_CONTIGUOUS; +} + +static inline bool +i915_gem_object_is_volatile(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_VOLATILE; +} + +static inline void +i915_gem_object_set_volatile(struct drm_i915_gem_object *obj) +{ + obj->flags |= I915_BO_ALLOC_VOLATILE; +} + +static inline bool +i915_gem_object_type_has(const struct drm_i915_gem_object *obj, + unsigned long flags) +{ + return obj->ops->flags & flags; } static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE); } static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE); } static inline bool i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY); } static inline bool -i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) +i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_GGTT); } static inline bool -i915_gem_object_is_active(const struct drm_i915_gem_object *obj) +i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) { - return READ_ONCE(obj->active_count); + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL); } static inline bool i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) { - return READ_ONCE(obj->framebuffer_references); + return READ_ONCE(obj->frontbuffer); } static inline unsigned int @@ -239,10 +280,27 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ + I915_MM_NORMAL = 0, + /* + * Only used by struct_mutex, when called "recursively" from + * direct-reclaim-esque. Safe because there is only every one + * struct_mutex in the entire system. + */ + I915_MM_SHRINKER = 1, + /* + * Used for obj->mm.lock when allocating pages. Safe because the object + * isn't yet on any LRU, and therefore the shrinker can't deadlock on + * it. As soon as the object has pages, obj->mm.lock nests within + * fs_reclaim. + */ + I915_MM_GET_PAGES = 1, +}; + static inline int __must_check i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - might_lock(&obj->mm.lock); + might_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (atomic_inc_not_zero(&obj->mm.pages_pin_count)) return 0; @@ -285,13 +343,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) __i915_gem_object_unpin_pages(obj); } -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ - I915_MM_NORMAL = 0, - I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ -}; - -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass); +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_object_truncate(struct drm_i915_gem_object *obj); void i915_gem_object_writeback(struct drm_i915_gem_object *obj); @@ -344,9 +396,6 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } -void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); -void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); - void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains); @@ -373,7 +422,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) struct dma_fence *fence; rcu_read_lock(); - fence = reservation_object_get_excl_rcu(obj->base.resv); + fence = dma_resv_get_excl_rcu(obj->base.resv); rcu_read_unlock(); if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) @@ -400,6 +449,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, unsigned int flags); void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); +void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); +void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); + static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (obj->cache_dirty) @@ -408,7 +461,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; - return obj->pin_global; /* currently in use by HW, keep flushed */ + /* Currently in use by HW (display engine)? Keep flushed. */ + return i915_gem_object_is_framebuffer(obj); } static inline void __start_cpu_write(struct drm_i915_gem_object *obj) @@ -425,6 +479,26 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr); -#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX) + +void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin); +void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin); + +static inline void +i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + if (unlikely(rcu_access_pointer(obj->frontbuffer))) + __i915_gem_object_flush_frontbuffer(obj, origin); +} + +static inline void +i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + if (unlikely(rcu_access_pointer(obj->frontbuffer))) + __i915_gem_object_invalidate_frontbuffer(obj, origin); +} #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index cb42e3a312e2..70809d8897cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -3,59 +3,137 @@ * Copyright © 2019 Intel Corporation */ -#include "i915_gem_object_blt.h" - +#include "i915_drv.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_pool.h" +#include "gt/intel_gt.h" +#include "gt/intel_ring.h" #include "i915_gem_clflush.h" -#include "intel_drv.h" +#include "i915_gem_object_blt.h" -int intel_emit_vma_fill_blt(struct i915_request *rq, - struct i915_vma *vma, - u32 value) +struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, + struct i915_vma *vma, + u32 value) { - u32 *cs; - - cs = intel_ring_begin(rq, 8); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - if (INTEL_GEN(rq->i915) >= 8) { - *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); - *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cs++ = 0; - *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = lower_32_bits(vma->node.start); - *cs++ = upper_32_bits(vma->node.start); - *cs++ = value; - *cs++ = MI_NOOP; - } else { - *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cs++ = 0; - *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = vma->node.start; - *cs++ = value; - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; + struct drm_i915_private *i915 = ce->vm->i915; + const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ + struct intel_engine_pool_node *pool; + struct i915_vma *batch; + u64 offset; + u64 count; + u64 rem; + u32 size; + u32 *cmd; + int err; + + GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); + intel_engine_pm_get(ce->engine); + + count = div_u64(round_up(vma->size, block_size), block_size); + size = (1 + 8 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + pool = intel_engine_get_pool(ce->engine, size); + if (IS_ERR(pool)) { + err = PTR_ERR(pool); + goto out_pm; + } + + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_put; + } + + rem = vma->size; + offset = vma->node.start; + + do { + u32 size = min_t(u64, rem, block_size); + + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + + if (INTEL_GEN(i915) >= 8) { + *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else { + *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = offset; + *cmd++ = value; + } + + /* Allow ourselves to be preempted in between blocks. */ + *cmd++ = MI_ARB_CHECK; + + offset += size; + rem -= size; + } while (rem); + + *cmd = MI_BATCH_BUFFER_END; + intel_gt_chipset_flush(ce->vm->gt); + + i915_gem_object_unpin_map(pool->obj); + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; } - intel_ring_advance(rq, cs); + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + + batch->private = pool; + return batch; - return 0; +out_put: + intel_engine_pool_put(pool); +out_pm: + intel_engine_pm_put(ce->engine); + return ERR_PTR(err); +} + +int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) +{ + int err; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + if (unlikely(err)) + return err; + + return intel_engine_pool_mark_active(vma->private, rq); +} + +void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) +{ + i915_vma_unpin(vma); + intel_engine_pool_put(vma->private); + intel_engine_pm_put(ce->engine); } int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_gem_context *ctx = ce->gem_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct i915_request *rq; + struct i915_vma *batch; struct i915_vma *vma; int err; - /* XXX: ce->vm please */ - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -69,12 +147,22 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, i915_gem_object_unlock(obj); } - rq = i915_request_create(ce); + batch = intel_emit_vma_fill_blt(ce, vma, value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unpin; + } + + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unpin; + goto out_batch; } + err = intel_emit_vma_mark_active(batch, rq); + if (unlikely(err)) + goto out_request; + err = i915_request_await_object(rq, obj, true); if (unlikely(err)) goto out_request; @@ -86,22 +174,229 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, } i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (unlikely(err)) goto out_request; - err = intel_emit_vma_fill_blt(rq, vma, value); + err = ce->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); out_request: if (unlikely(err)) i915_request_skip(rq, err); i915_request_add(rq); +out_batch: + intel_emit_vma_release(ce, batch); out_unpin: i915_vma_unpin(vma); return err; } +struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_vma *src, + struct i915_vma *dst) +{ + struct drm_i915_private *i915 = ce->vm->i915; + const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ + struct intel_engine_pool_node *pool; + struct i915_vma *batch; + u64 src_offset, dst_offset; + u64 count, rem; + u32 size, *cmd; + int err; + + GEM_BUG_ON(src->size != dst->size); + + GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); + intel_engine_pm_get(ce->engine); + + count = div_u64(round_up(dst->size, block_size), block_size); + size = (1 + 11 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + pool = intel_engine_get_pool(ce->engine, size); + if (IS_ERR(pool)) { + err = PTR_ERR(pool); + goto out_pm; + } + + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_put; + } + + rem = src->size; + src_offset = src->node.start; + dst_offset = dst->node.start; + + do { + size = min_t(u64, rem, block_size); + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + + if (INTEL_GEN(i915) >= 9) { + *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); + *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = lower_32_bits(dst_offset); + *cmd++ = upper_32_bits(dst_offset); + *cmd++ = 0; + *cmd++ = PAGE_SIZE; + *cmd++ = lower_32_bits(src_offset); + *cmd++ = upper_32_bits(src_offset); + } else if (INTEL_GEN(i915) >= 8) { + *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = lower_32_bits(dst_offset); + *cmd++ = upper_32_bits(dst_offset); + *cmd++ = 0; + *cmd++ = PAGE_SIZE; + *cmd++ = lower_32_bits(src_offset); + *cmd++ = upper_32_bits(src_offset); + } else { + *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; + *cmd++ = dst_offset; + *cmd++ = PAGE_SIZE; + *cmd++ = src_offset; + } + + /* Allow ourselves to be preempted in between blocks. */ + *cmd++ = MI_ARB_CHECK; + + src_offset += size; + dst_offset += size; + rem -= size; + } while (rem); + + *cmd = MI_BATCH_BUFFER_END; + intel_gt_chipset_flush(ce->vm->gt); + + i915_gem_object_unpin_map(pool->obj); + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + + batch->private = pool; + return batch; + +out_put: + intel_engine_pool_put(pool); +out_pm: + intel_engine_pm_put(ce->engine); + return ERR_PTR(err); +} + +static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write) +{ + struct drm_i915_gem_object *obj = vma->obj; + + if (obj->cache_dirty & ~obj->cache_coherent) + i915_gem_clflush_object(obj, 0); + + return i915_request_await_object(rq, obj, write); +} + +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct intel_context *ce) +{ + struct drm_gem_object *objs[] = { &src->base, &dst->base }; + struct i915_address_space *vm = ce->vm; + struct i915_vma *vma[2], *batch; + struct ww_acquire_ctx acquire; + struct i915_request *rq; + int err, i; + + vma[0] = i915_vma_instance(src, vm, NULL); + if (IS_ERR(vma[0])) + return PTR_ERR(vma[0]); + + err = i915_vma_pin(vma[0], 0, 0, PIN_USER); + if (unlikely(err)) + return err; + + vma[1] = i915_vma_instance(dst, vm, NULL); + if (IS_ERR(vma[1])) + goto out_unpin_src; + + err = i915_vma_pin(vma[1], 0, 0, PIN_USER); + if (unlikely(err)) + goto out_unpin_src; + + batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unpin_dst; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_batch; + } + + err = intel_emit_vma_mark_active(batch, rq); + if (unlikely(err)) + goto out_request; + + err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire); + if (unlikely(err)) + goto out_request; + + for (i = 0; i < ARRAY_SIZE(vma); i++) { + err = move_to_gpu(vma[i], rq, i); + if (unlikely(err)) + goto out_unlock; + } + + for (i = 0; i < ARRAY_SIZE(vma); i++) { + unsigned int flags = i ? EXEC_OBJECT_WRITE : 0; + + err = i915_vma_move_to_active(vma[i], rq, flags); + if (unlikely(err)) + goto out_unlock; + } + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (unlikely(err)) + goto out_unlock; + } + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); +out_unlock: + drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); +out_request: + if (unlikely(err)) + i915_request_skip(rq, err); + + i915_request_add(rq); +out_batch: + intel_emit_vma_release(ce, batch); +out_unpin_dst: + i915_vma_unpin(vma[1]); +out_unpin_src: + i915_vma_unpin(vma[0]); + return err; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_gem_object_blt.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h index 7ec7de6ac0c0..243a43a87824 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -8,17 +8,30 @@ #include <linux/types.h> +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_pool.h" +#include "i915_vma.h" + struct drm_i915_gem_object; -struct intel_context; -struct i915_request; -struct i915_vma; -int intel_emit_vma_fill_blt(struct i915_request *rq, - struct i915_vma *vma, - u32 value); +struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, + struct i915_vma *vma, + u32 value); + +struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_vma *src, + struct i915_vma *dst); + +int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq); +void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma); int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value); +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct intel_context *ce); + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 18bf4f8d6d80..f64ad77e6b1e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -8,11 +8,13 @@ #define __I915_GEM_OBJECT_TYPES_H__ #include <drm/drm_gem.h> +#include <uapi/drm/i915_drm.h> #include "i915_active.h" #include "i915_selftest.h" struct drm_i915_gem_object; +struct intel_fronbuffer; /* * struct i915_lut_handle tracks the fast lookups from handle to vma used @@ -29,9 +31,11 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) -#define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3) +#define I915_GEM_OBJECT_HAS_IOMEM BIT(1) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) +#define I915_GEM_OBJECT_IS_PROXY BIT(3) +#define I915_GEM_OBJECT_NO_GGTT BIT(4) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -59,6 +63,21 @@ struct drm_i915_gem_object_ops { void (*release)(struct drm_i915_gem_object *obj); }; +enum i915_mmap_type { + I915_MMAP_TYPE_GTT = 0, + I915_MMAP_TYPE_WC, + I915_MMAP_TYPE_WB, + I915_MMAP_TYPE_UC, +}; + +struct i915_mmap_offset { + struct drm_vma_offset_node vma_node; + struct drm_i915_gem_object *obj; + enum i915_mmap_type mmap_type; + + struct rb_node offset; +}; + struct drm_i915_gem_object { struct drm_gem_object base; @@ -114,9 +133,19 @@ struct drm_i915_gem_object { unsigned int userfault_count; struct list_head userfault_link; - struct list_head batch_pool_link; + struct { + spinlock_t lock; /* Protects access to mmo offsets */ + struct rb_root offsets; + } mmo; + I915_SELFTEST_DECLARE(struct list_head st_link); + unsigned long flags; +#define I915_BO_ALLOC_CONTIGUOUS BIT(0) +#define I915_BO_ALLOC_VOLATILE BIT(1) +#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) +#define I915_BO_READONLY BIT(2) + /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit @@ -142,9 +171,7 @@ struct drm_i915_gem_object { */ u16 write_domain; - atomic_t frontbuffer_bits; - unsigned int frontbuffer_ggtt_origin; /* write once */ - struct i915_active_request frontbuffer_write; + struct intel_frontbuffer __rcu *frontbuffer; /** Current tiling stride for the object, if it's tiled. */ unsigned int tiling_and_stride; @@ -154,18 +181,34 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ atomic_t bind_count; - unsigned int active_count; - /** Count of how many global VMA are currently pinned for use by HW */ - unsigned int pin_global; struct { - struct mutex lock; /* protects the pages and their use */ + /* + * Protects the pages and their use. Do not use directly, but + * instead go through the pin/unpin interfaces. + */ + struct mutex lock; atomic_t pages_pin_count; + atomic_t shrink_pin; + + /** + * Memory region for this object. + */ + struct intel_memory_region *region; + /** + * List of memory region blocks allocated for this object. + */ + struct list_head blocks; + /** + * Element within memory_region->objects or region->purgeable + * if the object is marked as DONTNEED. Access is protected by + * region->obj_lock. + */ + struct list_head region_link; struct sg_table *pages; void *mapping; - /* TODO: whack some of this into the error state */ struct i915_page_sizes { /** * The sg mask of the pages sg_table. i.e the mask of @@ -226,9 +269,6 @@ struct drm_i915_gem_object { bool quirked:1; } mm; - /** References from framebuffers, locks out tiling changes. */ - unsigned int framebuffer_references; - /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 65eb430cedba..54aca5c9101e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -7,6 +7,8 @@ #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" +#include "i915_gem_lmem.h" +#include "i915_gem_mman.h" void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -18,6 +20,9 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->mm.lock); + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_DONTNEED; + /* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { obj->write_domain = 0; @@ -71,6 +76,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, list = &i915->mm.shrink_list; list_add_tail(&obj->mm.link, list); + atomic_set(&obj->mm.shrink_pin, 0); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } } @@ -101,7 +107,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { int err; - err = mutex_lock_interruptible(&obj->mm.lock); + err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (err) return err; @@ -150,37 +156,30 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_unlock(); } +static void unmap_object(struct drm_i915_gem_object *obj, void *ptr) +{ + if (is_vmalloc_addr(ptr)) + vunmap(ptr); + else + kunmap(kmap_to_page(ptr)); +} + struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *pages; pages = fetch_and_zero(&obj->mm.pages); if (IS_ERR_OR_NULL(pages)) return pages; - if (i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_WILLNEED; - spin_lock_irqsave(&i915->mm.obj_lock, flags); - - list_del(&obj->mm.link); - i915->mm.shrink_count--; - i915->mm.shrink_memory -= obj->base.size; - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - } + i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { - void *ptr; - - ptr = page_mask_bits(obj->mm.mapping); - if (is_vmalloc_addr(ptr)) - vunmap(ptr); - else - kunmap(kmap_to_page(ptr)); - + unmap_object(obj, page_mask_bits(obj->mm.mapping)); obj->mm.mapping = NULL; } @@ -190,8 +189,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) return pages; } -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass) +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; int err; @@ -202,12 +200,14 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, GEM_BUG_ON(atomic_read(&obj->bind_count)); /* May be called by shrinker from within get_pages() (on another bo) */ - mutex_lock_nested(&obj->mm.lock, subclass); + mutex_lock(&obj->mm.lock); if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { err = -EBUSY; goto unlock; } + i915_gem_object_release_mmap_offset(obj); + /* * ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt @@ -234,36 +234,44 @@ unlock: return err; } +static inline pte_t iomap_pte(resource_size_t base, + dma_addr_t offset, + pgprot_t prot) +{ + return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot)); +} + /* The 'mapping' part of i915_gem_object_pin_map() below */ -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, +static void *i915_gem_object_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { - unsigned long n_pages = obj->base.size >> PAGE_SHIFT; + unsigned long n_pte = obj->base.size >> PAGE_SHIFT; struct sg_table *sgt = obj->mm.pages; - struct sgt_iter sgt_iter; - struct page *page; - struct page *stack_pages[32]; - struct page **pages = stack_pages; - unsigned long i = 0; + pte_t *stack[32], **mem; + struct vm_struct *area; pgprot_t pgprot; - void *addr; + + if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC) + return NULL; /* A single page can always be kmapped */ - if (n_pages == 1 && type == I915_MAP_WB) + if (n_pte == 1 && type == I915_MAP_WB) return kmap(sg_page(sgt->sgl)); - if (n_pages > ARRAY_SIZE(stack_pages)) { + mem = stack; + if (n_pte > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */ - pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); - if (!pages) + mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL); + if (!mem) return NULL; } - for_each_sgt_page(page, sgt_iter, sgt) - pages[i++] = page; - - /* Check that we have the expected number of pages */ - GEM_BUG_ON(i != n_pages); + area = alloc_vm_area(obj->base.size, mem); + if (!area) { + if (mem != stack) + kvfree(mem); + return NULL; + } switch (type) { default: @@ -276,12 +284,31 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, pgprot = pgprot_writecombine(PAGE_KERNEL_IO); break; } - addr = vmap(pages, n_pages, 0, pgprot); - if (pages != stack_pages) - kvfree(pages); + if (i915_gem_object_has_struct_page(obj)) { + struct sgt_iter iter; + struct page *page; + pte_t **ptes = mem; + + for_each_sgt_page(page, iter, sgt) + **ptes++ = mk_pte(page, pgprot); + } else { + resource_size_t iomap; + struct sgt_iter iter; + pte_t **ptes = mem; + dma_addr_t addr; - return addr; + iomap = obj->mm.region->iomap.base; + iomap -= obj->mm.region->region.start; + + for_each_sgt_daddr(addr, iter, sgt) + **ptes++ = iomap_pte(iomap, addr, pgprot); + } + + if (mem != stack) + kvfree(mem); + + return area->addr; } /* get, pin, and map the pages of the object into kernel space */ @@ -289,14 +316,16 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { enum i915_map_type has_type; + unsigned int flags; bool pinned; void *ptr; int err; - if (unlikely(!i915_gem_object_has_struct_page(obj))) + flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; + if (!i915_gem_object_type_has(obj, flags)) return ERR_PTR(-ENXIO); - err = mutex_lock_interruptible(&obj->mm.lock); + err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (err) return ERR_PTR(err); @@ -325,10 +354,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, goto err_unpin; } - if (is_vmalloc_addr(ptr)) - vunmap(ptr); - else - kunmap(kmap_to_page(ptr)); + unmap_object(obj, ptr); ptr = obj->mm.mapping = NULL; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 2deac933cf59..b1b7c1b3038a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -13,8 +13,10 @@ #include <drm/drm_legacy.h> /* for drm_pci.h! */ #include <drm/drm_pci.h> +#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_object.h" +#include "i915_gem_region.h" #include "i915_scatterlist.h" static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) @@ -60,7 +62,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) vaddr += PAGE_SIZE; } - i915_gem_chipset_flush(to_i915(obj->base.dev)); + intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) { @@ -132,16 +134,16 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, drm_pci_free(obj->base.dev, obj->phys_handle); } -static void -i915_gem_object_release_phys(struct drm_i915_gem_object *obj) +static void phys_release(struct drm_i915_gem_object *obj) { - i915_gem_object_unpin_pages(obj); + fput(obj->base.filp); } static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .get_pages = i915_gem_object_get_pages_phys, .put_pages = i915_gem_object_put_pages_phys, - .release = i915_gem_object_release_phys, + + .release = phys_release, }; int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) @@ -158,11 +160,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) if (obj->ops != &i915_gem_shmem_ops) return -EINVAL; - err = i915_gem_object_unbind(obj); + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); if (err) return err; - mutex_lock(&obj->mm.lock); + mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (obj->mm.madv != I915_MADV_WILLNEED) { err = -EFAULT; @@ -190,8 +192,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) /* Perma-pin (until release) the physical set of pages */ __i915_gem_object_pin_pages(obj); - if (!IS_ERR_OR_NULL(pages)) + if (!IS_ERR_OR_NULL(pages)) { i915_gem_shmem_ops.put_pages(obj, pages); + i915_gem_object_release_memory_region(obj); + } mutex_unlock(&obj->mm.lock); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 914b5d4112bb..c8264eb036bf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -5,149 +5,19 @@ */ #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "i915_globals.h" - -static void call_idle_barriers(struct intel_engine_cs *engine) -{ - struct llist_node *node, *next; - - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - struct i915_active_request *active = - container_of((struct list_head *)node, - typeof(*active), link); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, NULL); - } -} - -static void i915_gem_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) { - call_idle_barriers(engine); /* cleanup after wedging */ - i915_gem_batch_pool_fini(&engine->batch_pool); - } - - i915_timelines_park(i915); - i915_vma_parked(i915); - - i915_globals_park(); -} - -static void idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work); - bool park; - - cancel_delayed_work_sync(&i915->gem.retire_work); - mutex_lock(&i915->drm.struct_mutex); - - intel_wakeref_lock(&i915->gt.wakeref); - park = !intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work); - intel_wakeref_unlock(&i915->gt.wakeref); - if (park) - i915_gem_park(i915); - else - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - - mutex_unlock(&i915->drm.struct_mutex); -} - -static void retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.retire_work.work); - - /* Come back later if the device is busy... */ - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_retire_requests(i915); - mutex_unlock(&i915->drm.struct_mutex); - } - - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} - -static int pm_notifier(struct notifier_block *nb, - unsigned long action, - void *data) -{ - struct drm_i915_private *i915 = - container_of(nb, typeof(*i915), gem.pm_notifier); - - switch (action) { - case INTEL_GT_UNPARK: - i915_globals_unpark(); - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - break; - - case INTEL_GT_PARK: - queue_work(i915->wq, &i915->gem.idle_work); - break; - } - - return NOTIFY_OK; -} - -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) -{ - bool result = !i915_terminally_wedged(i915); - - do { - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - i915_gem_set_wedged(i915); - result = false; - } - } while (i915_retire_requests(i915) && result); - - GEM_BUG_ON(i915->gt.awake); - return result; -} - -bool i915_gem_load_power_context(struct drm_i915_private *i915) -{ - return switch_to_kernel_context_sync(i915); -} void i915_gem_suspend(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); - mutex_lock(&i915->drm.struct_mutex); - /* * We have to flush all the executing contexts to main memory so * that they can saved in the hibernation image. To ensure the last @@ -157,22 +27,9 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(i915); - - mutex_unlock(&i915->drm.struct_mutex); - - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); - flush_work(&i915->gem.idle_work); - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + intel_gt_suspend_prepare(&i915->gt); i915_gem_drain_freed_objects(i915); - - intel_uc_suspend(i915); } static struct drm_i915_gem_object *first_mm_object(struct list_head *list) @@ -212,6 +69,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ + intel_gt_suspend_late(&i915->gt); + spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { LIST_HEAD(keep); @@ -236,60 +95,16 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) list_splice_tail(&keep, *phase); } spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - - intel_uc_sanitize(i915); - i915_gem_sanitize(i915); } void i915_gem_resume(struct drm_i915_private *i915) { - GEM_TRACE("\n"); - - WARN_ON(i915->gt.awake); - - mutex_lock(&i915->drm.struct_mutex); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - - if (i915_gem_init_hw(i915)) - goto err_wedged; + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); /* * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - if (intel_gt_resume(i915)) - goto err_wedged; - - intel_uc_resume(i915); - - /* Always reload a context for powersaving. */ - if (!i915_gem_load_power_context(i915)) - goto err_wedged; - -out_unlock: - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); - return; - -err_wedged: - if (!i915_reset_failed(i915)) { - dev_err(i915->drm.dev, - "Failed to re-initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(i915); - } - goto out_unlock; -} - -void i915_gem_init__pm(struct drm_i915_private *i915) -{ - INIT_WORK(&i915->gem.idle_work, idle_work_handler); - INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); - - i915->gem.pm_notifier.notifier_call = pm_notifier; - blocking_notifier_chain_register(&i915->gt.pm_notifications, - &i915->gem.pm_notifier); + intel_gt_resume(&i915->gt); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h index 6f7d5d11ac3b..26b78dbdc225 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -12,9 +12,6 @@ struct drm_i915_private; struct work_struct; -void i915_gem_init__pm(struct drm_i915_private *i915); - -bool i915_gem_load_power_context(struct drm_i915_private *i915); void i915_gem_resume(struct drm_i915_private *i915); void i915_gem_idle_work_handler(struct work_struct *work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c new file mode 100644 index 000000000000..1515384d7e0e --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "i915_gem_region.h" +#include "i915_drv.h" +#include "i915_trace.h" + +void +i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + __intel_memory_region_put_pages_buddy(obj->mm.region, &obj->mm.blocks); + + obj->mm.dirty = false; + sg_free_table(pages); + kfree(pages); +} + +int +i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mem = obj->mm.region; + struct list_head *blocks = &obj->mm.blocks; + resource_size_t size = obj->base.size; + resource_size_t prev_end; + struct i915_buddy_block *block; + unsigned int flags; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + int ret; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) { + kfree(st); + return -ENOMEM; + } + + flags = I915_ALLOC_MIN_PAGE_SIZE; + if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) + flags |= I915_ALLOC_CONTIGUOUS; + + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks); + if (ret) + goto err_free_sg; + + GEM_BUG_ON(list_empty(blocks)); + + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + prev_end = (resource_size_t)-1; + + list_for_each_entry(block, blocks, link) { + u64 block_size, offset; + + block_size = min_t(u64, size, + i915_buddy_block_size(&mem->mm, block)); + offset = i915_buddy_block_offset(block); + + GEM_BUG_ON(overflows_type(block_size, sg->length)); + + if (offset != prev_end || + add_overflows_t(typeof(sg->length), sg->length, block_size)) { + if (st->nents) { + sg_page_sizes |= sg->length; + sg = __sg_next(sg); + } + + sg_dma_address(sg) = mem->region.start + offset; + sg_dma_len(sg) = block_size; + + sg->length = block_size; + + st->nents++; + } else { + sg->length += block_size; + sg_dma_len(sg) += block_size; + } + + prev_end = offset + block_size; + } + + sg_page_sizes |= sg->length; + sg_mark_end(sg); + i915_sg_trim(st); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err_free_sg: + sg_free_table(st); + kfree(st); + return ret; +} + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem, + unsigned long flags) +{ + INIT_LIST_HEAD(&obj->mm.blocks); + obj->mm.region = intel_memory_region_get(mem); + + obj->flags |= flags; + if (obj->base.size <= mem->min_page_size) + obj->flags |= I915_BO_ALLOC_CONTIGUOUS; + + mutex_lock(&mem->objects.lock); + + if (obj->flags & I915_BO_ALLOC_VOLATILE) + list_add(&obj->mm.region_link, &mem->objects.purgeable); + else + list_add(&obj->mm.region_link, &mem->objects.list); + + mutex_unlock(&mem->objects.lock); +} + +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mem = obj->mm.region; + + mutex_lock(&mem->objects.lock); + list_del(&obj->mm.region_link); + mutex_unlock(&mem->objects.lock); + + intel_memory_region_put(mem); +} + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + + /* + * NB: Our use of resource_size_t for the size stems from using struct + * resource for the mem->region. We might need to revisit this in the + * future. + */ + + GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS); + + if (!mem) + return ERR_PTR(-ENODEV); + + size = round_up(size, mem->min_page_size); + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); + + /* + * XXX: There is a prevalence of the assumption that we fit the + * object's page count inside a 32bit _signed_ variable. Let's document + * this and catch if we ever need to fix it. In the meantime, if you do + * spot such a local variable, please consider fixing! + */ + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = mem->ops->create_object(mem, size, flags); + if (!IS_ERR(obj)) + trace_i915_gem_object_create(obj); + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h new file mode 100644 index 000000000000..f2ff6f8bff74 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_REGION_H__ +#define __I915_GEM_REGION_H__ + +#include <linux/types.h> + +struct intel_memory_region; +struct drm_i915_gem_object; +struct sg_table; + +int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj); +void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages); + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem, + unsigned long flags); +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 19d9ecdb2894..a2a980d9d241 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -7,9 +7,12 @@ #include <linux/pagevec.h> #include <linux/swap.h> +#include "gem/i915_gem_region.h" #include "i915_drv.h" +#include "i915_gemfs.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" +#include "i915_trace.h" /* * Move pages to appropriate lru and release the pagevec, decrementing the @@ -25,6 +28,7 @@ static void check_release_pagevec(struct pagevec *pvec) static int shmem_get_pages(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mem = obj->mm.region; const unsigned long page_count = obj->base.size / PAGE_SIZE; unsigned long i; struct address_space *mapping; @@ -51,7 +55,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) * If there's no chance of allocating enough pages for the whole * object, bail early. */ - if (page_count > totalram_pages()) + if (obj->base.size > resource_size(&mem->region)) return -ENOMEM; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -414,6 +418,13 @@ shmem_pwrite(struct drm_i915_gem_object *obj, return 0; } +static void shmem_release(struct drm_i915_gem_object *obj) +{ + i915_gem_object_release_memory_region(obj); + + fput(obj->base.filp); +} + const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE, @@ -424,11 +435,13 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .writeback = shmem_writeback, .pwrite = shmem_pwrite, + + .release = shmem_release, }; -static int create_shmem(struct drm_i915_private *i915, - struct drm_gem_object *obj, - size_t size) +static int __create_shmem(struct drm_i915_private *i915, + struct drm_gem_object *obj, + resource_size_t size) { unsigned long flags = VM_NORESERVE; struct file *filp; @@ -447,31 +460,24 @@ static int create_shmem(struct drm_i915_private *i915, return 0; } -struct drm_i915_gem_object * -i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) +static struct drm_i915_gem_object * +create_shmem(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { + static struct lock_class_key lock_class; + struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; struct address_space *mapping; unsigned int cache_level; gfp_t mask; int ret; - /* There is a prevalence of the assumption that we fit the object's - * page count inside a 32bit _signed_ variable. Let's document this and - * catch if we ever need to fix it. In the meantime, if you do spot - * such a local variable, please consider fixing! - */ - if (size >> PAGE_SHIFT > INT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - obj = i915_gem_object_alloc(); if (!obj) return ERR_PTR(-ENOMEM); - ret = create_shmem(i915, &obj->base, size); + ret = __create_shmem(i915, &obj->base, size); if (ret) goto fail; @@ -486,7 +492,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops); + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -510,7 +516,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) i915_gem_object_set_cache_coherency(obj, cache_level); - trace_i915_gem_object_create(obj); + i915_gem_object_init_memory_region(obj, mem, 0); return obj; @@ -519,14 +525,22 @@ fail: return ERR_PTR(ret); } +struct drm_i915_gem_object * +i915_gem_object_create_shmem(struct drm_i915_private *i915, + resource_size_t size) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], + size, 0); +} + /* Allocate a new GEM object and fill it with the supplied data */ struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, - const void *data, size_t size) + const void *data, resource_size_t size) { struct drm_i915_gem_object *obj; struct file *file; - size_t offset; + resource_size_t offset; int err; obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); @@ -569,3 +583,37 @@ fail: i915_gem_object_put(obj); return ERR_PTR(err); } + +static int init_shmem(struct intel_memory_region *mem) +{ + int err; + + err = i915_gemfs_init(mem->i915); + if (err) { + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", + err); + } + + intel_memory_region_set_name(mem, "system"); + + return 0; /* Don't error, we can simply fallback to the kernel mnt */ +} + +static void release_shmem(struct intel_memory_region *mem) +{ + i915_gemfs_fini(mem->i915); +} + +static const struct intel_memory_region_ops shmem_region_ops = { + .init = init_shmem, + .release = release_shmem, + .create_object = create_shmem, +}; + +struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) +{ + return intel_memory_region_create(i915, 0, + totalram_pages() << PAGE_SHIFT, + PAGE_SIZE, 0, + &shmem_region_ops); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 3a926a8755c6..f7e4b39c734f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -16,40 +16,6 @@ #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *i915, - unsigned int flags, - bool *unlock) -{ - struct mutex *m = &i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(m)) { - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - - case MUTEX_TRYLOCK_FAILED: - *unlock = false; - if (flags & I915_SHRINK_ACTIVE && - mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) - *unlock = true; - return *unlock; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - } - - BUG(); -} - -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) -{ - if (!unlock) - return; - - mutex_unlock(&i915->drm.struct_mutex); -} - static bool swap_available(void) { return get_nr_swap_pages() > 0; @@ -61,7 +27,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_shrinkable(obj)) return false; - /* Only report true if by unbinding the object and putting its pages + /* + * Only report true if by unbinding the object and putting its pages * we can actually make forward progress towards freeing physical * pages. * @@ -72,26 +39,26 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count)) return false; - /* If any vma are "permanently" pinned, it will prevent us from - * reclaiming the obj->mm.pages. We only allow scanout objects to claim - * a permanent pin, along with a few others like the context objects. - * To simplify the scan, and to avoid walking the list of vma under the - * object, we just check the count of its permanently pinned. - */ - if (READ_ONCE(obj->pin_global)) - return false; - - /* We can only return physical pages to the system if we can either + /* + * We can only return physical pages to the system if we can either * discard the contents (because the user has marked them as being * purgeable) or if we can move their contents out to swap. */ return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; } -static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) +static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, + unsigned long shrink) { - if (i915_gem_object_unbind(obj) == 0) - __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + unsigned long flags; + + flags = 0; + if (shrink & I915_SHRINK_ACTIVE) + flags = I915_GEM_OBJECT_UNBIND_ACTIVE; + + if (i915_gem_object_unbind(obj, flags) == 0) + __i915_gem_object_put_pages(obj); + return !i915_gem_object_has_pages(obj); } @@ -154,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; - bool unlock; - - if (!shrinker_lock(i915, shrink, &unlock)) - return 0; /* * When shrinking the active list, we should also consider active @@ -169,7 +132,6 @@ i915_gem_shrink(struct drm_i915_private *i915, */ trace_i915_gem_shrink(i915, target, shrink); - i915_retire_requests(i915); /* * Unbinding of objects will require HW access; Let us not wake the @@ -230,8 +192,7 @@ i915_gem_shrink(struct drm_i915_private *i915, continue; if (!(shrink & I915_SHRINK_ACTIVE) && - (i915_gem_object_is_active(obj) || - i915_gem_object_is_framebuffer(obj))) + i915_gem_object_is_framebuffer(obj)) continue; if (!(shrink & I915_SHRINK_BOUND) && @@ -246,10 +207,9 @@ i915_gem_shrink(struct drm_i915_private *i915, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - if (unsafe_drop_pages(obj)) { + if (unsafe_drop_pages(obj, shrink)) { /* May arrive from get_pages on another bo */ - mutex_lock_nested(&obj->mm.lock, - I915_MM_SHRINKER); + mutex_lock(&obj->mm.lock); if (!i915_gem_object_has_pages(obj)) { try_to_writeback(obj, shrink); count += obj->base.size >> PAGE_SHIFT; @@ -269,10 +229,6 @@ i915_gem_shrink(struct drm_i915_private *i915, if (shrink & I915_SHRINK_BOUND) intel_runtime_pm_put(&i915->runtime_pm, wakeref); - i915_retire_requests(i915); - - shrinker_unlock(i915, unlock); - if (nr_scanned) *nr_scanned += scanned; return count; @@ -342,19 +298,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); unsigned long freed; - bool unlock; sc->nr_scanned = 0; - if (!shrinker_lock(i915, 0, &unlock)) - return SHRINK_STOP; - freed = i915_gem_shrink(i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); + I915_SHRINK_UNBOUND); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -369,8 +320,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) } } - shrinker_unlock(i915, unlock); - return sc->nr_scanned ? freed : SHRINK_STOP; } @@ -387,6 +336,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); @@ -422,16 +372,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr struct i915_vma *vma, *next; unsigned long freed_pages = 0; intel_wakeref_t wakeref; - bool unlock; - - if (!shrinker_lock(i915, 0, &unlock)) - return NOTIFY_DONE; - - /* Force everything onto the inactive lists */ - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT)) - goto out; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, @@ -448,27 +388,16 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr if (!vma->iomap || i915_vma_is_active(vma)) continue; - mutex_unlock(&i915->ggtt.vm.mutex); - if (i915_vma_unbind(vma) == 0) + if (__i915_vma_unbind(vma) == 0) freed_pages += count; - mutex_lock(&i915->ggtt.vm.mutex); } mutex_unlock(&i915->ggtt.vm.mutex); -out: - shrinker_unlock(i915, unlock); - *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } -/** - * i915_gem_shrinker_register - Register the i915 shrinker - * @i915: i915 device - * - * This function registers and sets up the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_register(struct drm_i915_private *i915) +void i915_gem_driver_register__shrinker(struct drm_i915_private *i915) { i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; i915->mm.shrinker.count_objects = i915_gem_shrinker_count; @@ -483,13 +412,7 @@ void i915_gem_shrinker_register(struct drm_i915_private *i915) WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); } -/** - * i915_gem_shrinker_unregister - Unregisters the i915 shrinker - * @i915: i915 device - * - * This function unregisters the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_unregister(struct drm_i915_private *i915) +void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915) { WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); @@ -512,24 +435,75 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_acquire(GFP_KERNEL); + mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); + mutex_release(&mutex->dep_map, _RET_IP_); + + fs_reclaim_release(GFP_KERNEL); + + if (unlock) + mutex_release(&i915->drm.struct_mutex.dep_map, _RET_IP_); +} + +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + +void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + /* - * As we invariably rely on the struct_mutex within the shrinker, - * but have a complicated recursion dance, taint all the mutexes used - * within the shrinker with the struct_mutex. For completeness, we - * taint with all subclass of struct_mutex, even though we should - * only need tainting by I915_MM_NORMAL to catch possible ABBA - * deadlocks from using struct_mutex inside @mutex. + * We can only be called while the pages are pinned or when + * the pages are released. If pinned, we should only be called + * from a single caller under controlled conditions; and on release + * only one caller may release us. Neither the two may cross. */ - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_SHRINKER, 0, _RET_IP_); + if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0)) + return; - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); - mutex_release(&mutex->dep_map, 0, _RET_IP_); + spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (!atomic_fetch_inc(&obj->mm.shrink_pin) && + !list_empty(&obj->mm.link)) { + list_del_init(&obj->mm.link); + i915->mm.shrink_count--; + i915->mm.shrink_memory -= obj->base.size; + } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); +} - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); +static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, + struct list_head *head) +{ + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; - fs_reclaim_release(GFP_KERNEL); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + if (!i915_gem_object_is_shrinkable(obj)) + return; - if (unlock) - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); + if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1)) + return; + + spin_lock_irqsave(&i915->mm.obj_lock, flags); + GEM_BUG_ON(!kref_read(&obj->base.refcount)); + if (atomic_dec_and_test(&obj->mm.shrink_pin)) { + GEM_BUG_ON(!list_empty(&obj->mm.link)); + + list_add_tail(&obj->mm.link, head); + i915->mm.shrink_count++; + i915->mm.shrink_memory += obj->base.size; + + } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); +} + +void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) +{ + __i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.shrink_list); +} + +void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) +{ + __i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.purge_list); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h new file mode 100644 index 000000000000..b397d7785789 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_SHRINKER_H__ +#define __I915_GEM_SHRINKER_H__ + +#include <linux/bits.h> + +struct drm_i915_private; +struct mutex; + +/* i915_gem_shrinker.c */ +unsigned long i915_gem_shrink(struct drm_i915_private *i915, + unsigned long target, + unsigned long *nr_scanned, + unsigned flags); +#define I915_SHRINK_UNBOUND BIT(0) +#define I915_SHRINK_BOUND BIT(1) +#define I915_SHRINK_ACTIVE BIT(2) +#define I915_SHRINK_VMAPS BIT(3) +#define I915_SHRINK_WRITEBACK BIT(4) + +unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); +void i915_gem_driver_register__shrinker(struct drm_i915_private *i915); +void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915); +void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, + struct mutex *mutex); + +#endif /* __I915_GEM_SHRINKER_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index de1fab2058ec..451f3078d60d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -10,7 +10,9 @@ #include <drm/drm_mm.h> #include <drm/i915_drm.h> +#include "gem/i915_gem_region.h" #include "i915_drv.h" +#include "i915_gem_stolen.h" /* * The BIOS typically reserves some of the system's memory for the exclusive @@ -24,48 +26,49 @@ * for is a boon. */ -int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, struct drm_mm_node *node, u64 size, unsigned alignment, u64 start, u64 end) { int ret; - if (!drm_mm_initialized(&dev_priv->mm.stolen)) + if (!drm_mm_initialized(&i915->mm.stolen)) return -ENODEV; /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(dev_priv) >= 8 && start < 4096) + if (INTEL_GEN(i915) >= 8 && start < 4096) start = 4096; - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, + mutex_lock(&i915->mm.stolen_lock); + ret = drm_mm_insert_node_in_range(&i915->mm.stolen, node, size, alignment, 0, start, end, DRM_MM_INSERT_BEST); - mutex_unlock(&dev_priv->mm.stolen_lock); + mutex_unlock(&i915->mm.stolen_lock); return ret; } -int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, +int i915_gem_stolen_insert_node(struct drm_i915_private *i915, struct drm_mm_node *node, u64 size, unsigned alignment) { - return i915_gem_stolen_insert_node_in_range(dev_priv, node, size, + return i915_gem_stolen_insert_node_in_range(i915, node, size, alignment, 0, U64_MAX); } -void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, +void i915_gem_stolen_remove_node(struct drm_i915_private *i915, struct drm_mm_node *node) { - mutex_lock(&dev_priv->mm.stolen_lock); + mutex_lock(&i915->mm.stolen_lock); drm_mm_remove_node(node); - mutex_unlock(&dev_priv->mm.stolen_lock); + mutex_unlock(&i915->mm.stolen_lock); } -static int i915_adjust_stolen(struct drm_i915_private *dev_priv, +static int i915_adjust_stolen(struct drm_i915_private *i915, struct resource *dsm) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_ggtt *ggtt = &i915->ggtt; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; struct resource *r; if (dsm->start == 0 || dsm->end <= dsm->start) @@ -77,14 +80,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, */ /* Make sure we don't clobber the GTT if it's within stolen memory */ - if (INTEL_GEN(dev_priv) <= 4 && - !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { + if (INTEL_GEN(i915) <= 4 && + !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) { struct resource stolen[2] = {*dsm, *dsm}; struct resource ggtt_res; resource_size_t ggtt_start; - ggtt_start = I915_READ(PGTBL_CTL); - if (IS_GEN(dev_priv, 4)) + ggtt_start = intel_uncore_read(uncore, PGTBL_CTL); + if (IS_GEN(i915, 4)) ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) | (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; else @@ -118,7 +121,7 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, * kernel. So if the region is already marked as busy, something * is seriously wrong. */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start, + r = devm_request_mem_region(i915->drm.dev, dsm->start, resource_size(dsm), "Graphics Stolen Memory"); if (r == NULL) { @@ -131,14 +134,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, * reservation starting from 1 instead of 0. * There's also BIOS with off-by-one on the other end. */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1, + r = devm_request_mem_region(i915->drm.dev, dsm->start + 1, resource_size(dsm) - 2, "Graphics Stolen Memory"); /* * GEN3 firmware likes to smash pci bridges into the stolen * range. Apparently this works. */ - if (r == NULL && !IS_GEN(dev_priv, 3)) { + if (!r && !IS_GEN(i915, 3)) { DRM_ERROR("conflict detected with stolen region: %pR\n", dsm); @@ -149,25 +152,27 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, return 0; } -void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) +static void i915_gem_cleanup_stolen(struct drm_i915_private *i915) { - if (!drm_mm_initialized(&dev_priv->mm.stolen)) + if (!drm_mm_initialized(&i915->mm.stolen)) return; - drm_mm_takedown(&dev_priv->mm.stolen); + drm_mm_takedown(&i915->mm.stolen); } -static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void g4x_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(IS_GM45(dev_priv) ? - CTG_STOLEN_RESERVED : - ELK_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; + u32 reg_val = intel_uncore_read(uncore, + IS_GM45(i915) ? + CTG_STOLEN_RESERVED : + ELK_STOLEN_RESERVED); + resource_size_t stolen_top = i915->dsm.end + 1; DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", - IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val); + IS_GM45(i915) ? "CTG" : "ELK", reg_val); if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) return; @@ -176,7 +181,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, * Whether ILK really reuses the ELK register for this is unclear. * Let's see if we catch anyone with this supposedly enabled on ILK. */ - WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n", + WARN(IS_GEN(i915, 5), "ILK stolen reserved found? 0x%08x\n", reg_val); if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) @@ -188,11 +193,12 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, *size = stolen_top - *base; } -static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void gen6_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -220,12 +226,13 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void vlv_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = i915->dsm.end + 1; DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -248,11 +255,12 @@ static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, *base = stolen_top - *size; } -static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void gen7_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -274,11 +282,12 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void chv_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -306,12 +315,13 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, +static void bdw_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; + u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = i915->dsm.end + 1; DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); @@ -326,10 +336,11 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void icl_get_stolen_reserved(struct drm_i915_private *i915, + struct intel_uncore *uncore, resource_size_t *base, resource_size_t *size) { - u64 reg_val = intel_uncore_read64(&i915->uncore, GEN6_STOLEN_RESERVED); + u64 reg_val = intel_uncore_read64(uncore, GEN6_STOLEN_RESERVED); DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); @@ -354,75 +365,84 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, } } -int i915_gem_init_stolen(struct drm_i915_private *dev_priv) +static int i915_gem_init_stolen(struct drm_i915_private *i915) { + struct intel_uncore *uncore = &i915->uncore; resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; - mutex_init(&dev_priv->mm.stolen_lock); + mutex_init(&i915->mm.stolen_lock); - if (intel_vgpu_active(dev_priv)) { - DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); + if (intel_vgpu_active(i915)) { + dev_notice(i915->drm.dev, + "%s, disabling use of stolen memory\n", + "iGVT-g active"); return 0; } - if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) { - DRM_INFO("DMAR active, disabling use of stolen memory\n"); + if (intel_vtd_active() && INTEL_GEN(i915) < 8) { + dev_notice(i915->drm.dev, + "%s, disabling use of stolen memory\n", + "DMAR active"); return 0; } if (resource_size(&intel_graphics_stolen_res) == 0) return 0; - dev_priv->dsm = intel_graphics_stolen_res; + i915->dsm = intel_graphics_stolen_res; - if (i915_adjust_stolen(dev_priv, &dev_priv->dsm)) + if (i915_adjust_stolen(i915, &i915->dsm)) return 0; - GEM_BUG_ON(dev_priv->dsm.start == 0); - GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); + GEM_BUG_ON(i915->dsm.start == 0); + GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); - stolen_top = dev_priv->dsm.end + 1; + stolen_top = i915->dsm.end + 1; reserved_base = stolen_top; reserved_size = 0; - switch (INTEL_GEN(dev_priv)) { + switch (INTEL_GEN(i915)) { case 2: case 3: break; case 4: - if (!IS_G4X(dev_priv)) + if (!IS_G4X(i915)) break; /* fall through */ case 5: - g4x_get_stolen_reserved(dev_priv, + g4x_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; case 6: - gen6_get_stolen_reserved(dev_priv, + gen6_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; case 7: - if (IS_VALLEYVIEW(dev_priv)) - vlv_get_stolen_reserved(dev_priv, + if (IS_VALLEYVIEW(i915)) + vlv_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); else - gen7_get_stolen_reserved(dev_priv, + gen7_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; case 8: case 9: case 10: - if (IS_LP(dev_priv)) - chv_get_stolen_reserved(dev_priv, + if (IS_LP(i915)) + chv_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); else - bdw_get_stolen_reserved(dev_priv, + bdw_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); break; - case 11: default: - icl_get_stolen_reserved(dev_priv, &reserved_base, + MISSING_CASE(INTEL_GEN(i915)); + /* fall-through */ + case 11: + case 12: + icl_get_stolen_reserved(i915, uncore, + &reserved_base, &reserved_size); break; } @@ -439,12 +459,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_size = 0; } - dev_priv->dsm_reserved = - (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size); + i915->dsm_reserved = + (struct resource)DEFINE_RES_MEM(reserved_base, reserved_size); - if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) { + if (!resource_contains(&i915->dsm, &i915->dsm_reserved)) { DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", - &dev_priv->dsm_reserved, &dev_priv->dsm); + &i915->dsm_reserved, &i915->dsm); return 0; } @@ -453,14 +473,14 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_total = stolen_top - reserved_base; DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", - (u64)resource_size(&dev_priv->dsm) >> 10, - ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); + (u64)resource_size(&i915->dsm) >> 10, + ((u64)resource_size(&i915->dsm) - reserved_total) >> 10); - dev_priv->stolen_usable_size = - resource_size(&dev_priv->dsm) - reserved_total; + i915->stolen_usable_size = + resource_size(&i915->dsm) - reserved_total; /* Basic memrange allocator for stolen space. */ - drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); + drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size); return 0; } @@ -469,11 +489,11 @@ static struct sg_table * i915_pages_create_for_stolen(struct drm_device *dev, resource_size_t offset, resource_size_t size) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct sg_table *st; struct scatterlist *sg; - GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm))); + GEM_BUG_ON(range_overflows(offset, size, resource_size(&i915->dsm))); /* We hide that we have no struct page backing our stolen object * by wrapping the contiguous physical allocation with a fake @@ -493,7 +513,7 @@ i915_pages_create_for_stolen(struct drm_device *dev, sg->offset = 0; sg->length = size; - sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset; + sg_dma_address(sg) = (dma_addr_t)i915->dsm.start + offset; sg_dma_len(sg) = size; return st; @@ -524,14 +544,14 @@ static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, static void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); GEM_BUG_ON(!stolen); - __i915_gem_object_unpin_pages(obj); + i915_gem_object_release_memory_region(obj); - i915_gem_stolen_remove_node(dev_priv, stolen); + i915_gem_stolen_remove_node(i915, stolen); kfree(stolen); } @@ -542,83 +562,133 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { }; static struct drm_i915_gem_object * -_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - struct drm_mm_node *stolen) +__i915_gem_object_create_stolen(struct intel_memory_region *mem, + struct drm_mm_node *stolen) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; + int err = -ENOMEM; obj = i915_gem_object_alloc(); - if (obj == NULL) - return NULL; + if (!obj) + goto err; - drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); - i915_gem_object_init(obj, &i915_gem_object_stolen_ops); + drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size); + i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class); obj->stolen = stolen; obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; - cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; + cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); - if (i915_gem_object_pin_pages(obj)) + err = i915_gem_object_pin_pages(obj); + if (err) goto cleanup; + i915_gem_object_init_memory_region(obj, mem, 0); + return obj; cleanup: i915_gem_object_free(obj); - return NULL; +err: + return ERR_PTR(err); } -struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - resource_size_t size) +static struct drm_i915_gem_object * +_i915_gem_object_create_stolen(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { + struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; + if (!drm_mm_initialized(&i915->mm.stolen)) + return ERR_PTR(-ENODEV); if (size == 0) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); - ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); + ret = i915_gem_stolen_insert_node(i915, stolen, size, 4096); if (ret) { - kfree(stolen); - return NULL; + obj = ERR_PTR(ret); + goto err_free; } - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj) - return obj; + obj = __i915_gem_object_create_stolen(mem, stolen); + if (IS_ERR(obj)) + goto err_remove; + + return obj; - i915_gem_stolen_remove_node(dev_priv, stolen); +err_remove: + i915_gem_stolen_remove_node(i915, stolen); +err_free: kfree(stolen); - return NULL; + return obj; } struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, +i915_gem_object_create_stolen(struct drm_i915_private *i915, + resource_size_t size) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN], + size, I915_BO_ALLOC_CONTIGUOUS); +} + +static int init_stolen(struct intel_memory_region *mem) +{ + intel_memory_region_set_name(mem, "stolen"); + + /* + * Initialise stolen early so that we may reserve preallocated + * objects for the BIOS to KMS transition. + */ + return i915_gem_init_stolen(mem->i915); +} + +static void release_stolen(struct intel_memory_region *mem) +{ + i915_gem_cleanup_stolen(mem->i915); +} + +static const struct intel_memory_region_ops i915_region_stolen_ops = { + .init = init_stolen, + .release = release_stolen, + .create_object = _i915_gem_object_create_stolen, +}; + +struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915) +{ + return intel_memory_region_create(i915, + intel_graphics_stolen_res.start, + resource_size(&intel_graphics_stolen_res), + PAGE_SIZE, 0, + &i915_region_stolen_ops); +} + +struct drm_i915_gem_object * +i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, resource_size_t stolen_offset, resource_size_t gtt_offset, resource_size_t size) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN]; + struct i915_ggtt *ggtt = &i915->ggtt; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; struct i915_vma *vma; int ret; - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); + if (!drm_mm_initialized(&i915->mm.stolen)) + return ERR_PTR(-ENODEV); DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", &stolen_offset, >t_offset, &size); @@ -627,29 +697,29 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (WARN_ON(size == 0) || WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); stolen->start = stolen_offset; stolen->size = size; - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen); - mutex_unlock(&dev_priv->mm.stolen_lock); + mutex_lock(&i915->mm.stolen_lock); + ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); + mutex_unlock(&i915->mm.stolen_lock); if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); kfree(stolen); - return NULL; + return ERR_PTR(ret); } - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj == NULL) { + obj = __i915_gem_object_create_stolen(mem, stolen); + if (IS_ERR(obj)) { DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); - i915_gem_stolen_remove_node(dev_priv, stolen); + i915_gem_stolen_remove_node(i915, stolen); kfree(stolen); - return NULL; + return obj; } /* Some objects just need physical mem from stolen space */ @@ -671,22 +741,26 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv * setting up the GTT space. The actual reservation will occur * later. */ + mutex_lock(&ggtt->vm.mutex); ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, size, gtt_offset, obj->cache_level, 0); if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); + mutex_unlock(&ggtt->vm.mutex); goto err_pages; } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->pages); vma->pages = obj->mm.pages; - vma->flags |= I915_VMA_GLOBAL_BIND; + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + + set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); __i915_vma_set_map_and_fenceable(vma); - mutex_lock(&ggtt->vm.mutex); - list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); mutex_unlock(&ggtt->vm.mutex); GEM_BUG_ON(i915_gem_object_is_shrinkable(obj)); @@ -698,5 +772,5 @@ err_pages: i915_gem_object_unpin_pages(obj); err: i915_gem_object_put(obj); - return NULL; + return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h new file mode 100644 index 000000000000..c1040627fbf3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_STOLEN_H__ +#define __I915_GEM_STOLEN_H__ + +#include <linux/types.h> + +struct drm_i915_private; +struct drm_mm_node; +struct drm_i915_gem_object; + +int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment); +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment, u64 start, + u64 end); +void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node); +struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915); +struct drm_i915_gem_object * +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size); +struct drm_i915_gem_object * +i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, + resource_size_t stolen_offset, + resource_size_t gtt_offset, + resource_size_t size); + +#endif /* __I915_GEM_STOLEN_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index adb3074d9ce2..540ef0551789 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -41,7 +41,7 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, long ret; /* ABI: return -EIO if already wedged */ - ret = i915_terminally_wedged(to_i915(dev)); + ret = intel_gt_terminally_wedged(&to_i915(dev)->gt); if (ret) return ret; @@ -50,10 +50,8 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - if (target) { + if (target && xchg(&target->file_priv, NULL)) list_del(&target->client_link); - target->file_priv = NULL; - } target = request; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ca0c2f451742..6c7825a2dc2a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -11,6 +11,7 @@ #include "i915_drv.h" #include "i915_gem.h" #include "i915_gem_ioctls.h" +#include "i915_gem_mman.h" #include "i915_gem_object.h" /** @@ -181,22 +182,25 @@ static int i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode, unsigned int stride) { + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; struct i915_vma *vma; - int ret; + int ret = 0; if (tiling_mode == I915_TILING_NONE) return 0; + mutex_lock(&ggtt->vm.mutex); for_each_ggtt_vma(vma, obj) { if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); if (ret) - return ret; + break; } + mutex_unlock(&ggtt->vm.mutex); - return 0; + return ret; } int @@ -212,7 +216,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); - lockdep_assert_held(&i915->drm.struct_mutex); if ((tiling | stride) == obj->tiling_and_stride) return 0; @@ -233,16 +236,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - err = i915_gem_object_fence_prepare(obj, tiling, stride); - if (err) - return err; - i915_gem_object_lock(obj); if (i915_gem_object_is_framebuffer(obj)) { i915_gem_object_unlock(obj); return -EBUSY; } + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) { + i915_gem_object_unlock(obj); + return err; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -313,10 +318,14 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_set_tiling *args = data; struct drm_i915_gem_object *obj; int err; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; @@ -340,9 +349,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x; + args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x; else - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y; + args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, @@ -364,12 +373,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, } } - err = mutex_lock_interruptible(&dev->struct_mutex); - if (err) - goto err; - err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); - mutex_unlock(&dev->struct_mutex); /* We have to maintain this existing ABI... */ args->stride = i915_gem_object_get_stride(obj); @@ -402,6 +406,9 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = -ENOENT; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (obj) { @@ -415,10 +422,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, switch (args->tiling_mode) { case I915_TILING_X: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y; break; default: case I915_TILING_NONE: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 2caa594322bc..580319b7bf1a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -12,11 +12,10 @@ #include <drm/i915_drm.h> +#include "i915_drv.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" -#include "i915_trace.h" -#include "intel_drv.h" struct i915_mm_struct { struct mm_struct *mm; @@ -93,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); struct interval_tree_node *it; - struct mutex *unlock = NULL; unsigned long end; int ret = 0; @@ -130,32 +128,14 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); - if (!unlock) { - unlock = &mn->mm->i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(unlock)) { - default: - case MUTEX_TRYLOCK_FAILED: - if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { - i915_gem_object_put(obj); - return -EINTR; - } - /* fall through */ - case MUTEX_TRYLOCK_SUCCESS: - break; - - case MUTEX_TRYLOCK_RECURSIVE: - unlock = ERR_PTR(-EEXIST); - break; - } - } - - ret = i915_gem_object_unbind(obj); + ret = i915_gem_object_unbind(obj, + I915_GEM_OBJECT_UNBIND_ACTIVE | + I915_GEM_OBJECT_UNBIND_BARRIER); if (ret == 0) - ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + ret = __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); if (ret) - goto unlock; + return ret; spin_lock(&mn->lock); @@ -168,10 +148,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); -unlock: - if (!IS_ERR_OR_NULL(unlock)) - mutex_unlock(unlock); - return ret; } @@ -427,7 +403,7 @@ struct get_pages_work { static struct sg_table * __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, - struct page **pvec, int num_pages) + struct page **pvec, unsigned long num_pages) { unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; @@ -473,9 +449,10 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) { struct get_pages_work *work = container_of(_work, typeof(*work), work); struct drm_i915_gem_object *obj = work->obj; - const int npages = obj->base.size >> PAGE_SHIFT; + const unsigned long npages = obj->base.size >> PAGE_SHIFT; + unsigned long pinned; struct page **pvec; - int pinned, ret; + int ret; ret = -ENOMEM; pinned = 0; @@ -484,31 +461,36 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; unsigned int flags = 0; + int locked = 0; if (!i915_gem_object_is_readonly(obj)) flags |= FOLL_WRITE; ret = -EFAULT; if (mmget_not_zero(mm)) { - down_read(&mm->mmap_sem); while (pinned < npages) { + if (!locked) { + down_read(&mm->mmap_sem); + locked = 1; + } ret = get_user_pages_remote (work->task, mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, flags, - pvec + pinned, NULL, NULL); + pvec + pinned, NULL, &locked); if (ret < 0) break; pinned += ret; } - up_read(&mm->mmap_sem); + if (locked) + up_read(&mm->mmap_sem); mmput(mm); } } - mutex_lock(&obj->mm.lock); + mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); if (obj->userptr.work == &work->work) { struct sg_table *pages = ERR_PTR(ret); @@ -578,7 +560,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { - const int num_pages = obj->base.size >> PAGE_SHIFT; + const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; struct mm_struct *mm = obj->userptr.mm->mm; struct page **pvec; struct sg_table *pages; @@ -662,8 +644,16 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); + /* + * We always mark objects as dirty when they are used by the GPU, + * just in case. However, if we set the vma as being read-only we know + * that the object will never have been written to. + */ + if (i915_gem_object_is_readonly(obj)) + obj->mm.dirty = false; + for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) + if (obj->mm.dirty && trylock_page(page)) { /* * As this may not be anonymous memory (e.g. shmem) * but exist on a real mapping, we have to lock @@ -671,8 +661,20 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, * the page reference is not sufficient to * prevent the inode from being truncated. * Play safe and take the lock. + * + * However...! + * + * The mmu-notifier can be invalidated for a + * migrate_page, that is alreadying holding the lock + * on the page. Such a try_to_unmap() will result + * in us calling put_pages() and so recursively try + * to lock the page. We avoid that deadlock with + * a trylock_page() and in exchange we risk missing + * some page dirtying. */ - set_page_dirty_lock(page); + set_page_dirty(page); + unlock_page(page); + } mark_page_accessed(page); put_page(page); @@ -702,6 +704,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_NO_GGTT | I915_GEM_OBJECT_ASYNC_CANCEL, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, @@ -749,6 +752,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + static struct lock_class_key lock_class; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_userptr *args = data; struct drm_i915_gem_object *obj; @@ -776,14 +780,11 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -EFAULT; if (args->flags & I915_USERPTR_READ_ONLY) { - struct i915_address_space *vm; - /* * On almost all of the older hw, we cannot tell the GPU that * a page is readonly. */ - vm = dev_priv->kernel_context->vm; - if (!vm || !vm->has_read_only) + if (!dev_priv->gt.vm->has_read_only) return -ENODEV; } @@ -792,7 +793,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 26ec6579b7cd..8af55cd3e690 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -31,11 +31,10 @@ i915_gem_object_wait_fence(struct dma_fence *fence, } static long -i915_gem_object_wait_reservation(struct reservation_object *resv, +i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) { - unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; bool prune_fences = false; @@ -44,7 +43,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, unsigned int count, i; int ret; - ret = reservation_object_get_fences_rcu(resv, + ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); if (ret) return ret; @@ -73,7 +72,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, */ prune_fences = count && timeout >= 0; } else { - excl = reservation_object_get_excl_rcu(resv); + excl = dma_resv_get_excl_rcu(resv); } if (excl && timeout >= 0) @@ -83,15 +82,12 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, /* * Opportunistically prune the fences iff we know they have *all* been - * signaled and that the reservation object has not been changed (i.e. - * no new fences have been added). + * signaled. */ - if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { - if (reservation_object_trylock(resv)) { - if (!__read_seqcount_retry(&resv->seq, seq)) - reservation_object_add_excl_fence(resv, NULL); - reservation_object_unlock(resv); - } + if (prune_fences && dma_resv_trylock(resv)) { + if (dma_resv_test_signaled_rcu(resv, true)) + dma_resv_add_excl_fence(resv, NULL); + dma_resv_unlock(resv); } return timeout; @@ -144,7 +140,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int count, i; int ret; - ret = reservation_object_get_fences_rcu(obj->base.resv, + ret = dma_resv_get_fences_rcu(obj->base.resv, &excl, &count, &shared); if (ret) return ret; @@ -156,7 +152,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, kfree(shared); } else { - excl = reservation_object_get_excl_rcu(obj->base.resv); + excl = dma_resv_get_excl_rcu(obj->base.resv); } if (excl) { diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 099f3397aada..5e6e8c91ab38 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -20,31 +20,18 @@ int i915_gemfs_init(struct drm_i915_private *i915) if (!type) return -ENODEV; - gemfs = kern_mount(type); - if (IS_ERR(gemfs)) - return PTR_ERR(gemfs); - /* - * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most - * likely 2M. Note that within_size may overallocate huge-pages, if say - * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under - * memory pressure shmem should split any huge-pages which can be - * shrunk. + * By creating our own shmemfs mountpoint, we can pass in + * mount flags that better match our usecase. + * + * One example, although it is probably better with a per-file + * control, is selecting huge page allocations ("huge=within_size"). + * Currently unused due to bandwidth issues (slow reads) on Broadwell+. */ - if (has_transparent_hugepage()) { - struct super_block *sb = gemfs->mnt_sb; - /* FIXME: Disabled until we get W/A for read BW issue. */ - char options[] = "huge=never"; - int flags = 0; - int err; - - err = sb->s_op->remount_fs(sb, &flags, options); - if (err) { - kern_unmount(gemfs); - return err; - } - } + gemfs = kern_mount(type); + if (IS_ERR(gemfs)) + return PTR_ERR(gemfs); i915->mm.gemfs = gemfs; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 3c5d17b2b670..fa16f2c3f3ac 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -12,10 +12,14 @@ static void huge_free_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { unsigned long nreal = obj->scratch / PAGE_SIZE; - struct scatterlist *sg; + struct sgt_iter sgt_iter; + struct page *page; - for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) - __free_page(sg_page(sg)); + for_each_sgt_page(page, sgt_iter, pages) { + __free_page(page); + if (!--nreal) + break; + } sg_free_table(pages); kfree(pages); @@ -70,7 +74,6 @@ static int huge_get_pages(struct drm_i915_gem_object *obj) err: huge_free_pages(obj, pages); - return -ENOMEM; #undef GFP } @@ -96,6 +99,7 @@ huge_gem_object(struct drm_i915_private *i915, phys_addr_t phys_size, dma_addr_t dma_size) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -111,7 +115,7 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops); + i915_gem_object_init(obj, &huge_ops, &lock_class); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h index 549c1394bcdc..b8cf31b7bf14 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h @@ -7,6 +7,12 @@ #ifndef __HUGE_GEM_OBJECT_H #define __HUGE_GEM_OBJECT_H +#include <linux/types.h> + +#include "gem/i915_gem_object_types.h" + +struct drm_i915_private; + struct drm_i915_gem_object * huge_gem_object(struct drm_i915_private *i915, phys_addr_t phys_size, diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index b74729b6f353..9311250d7d6f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -8,13 +8,18 @@ #include "i915_selftest.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" + #include "igt_gem_utils.h" #include "mock_context.h" #include "selftests/mock_drm.h" #include "selftests/mock_gem_device.h" +#include "selftests/mock_region.h" #include "selftests/i915_random.h" static const unsigned int page_sizes[] = { @@ -111,8 +116,6 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, st)) goto err; - obj->mm.madv = I915_MADV_DONTNEED; - GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); __i915_gem_object_set_pages(obj, st, sg_page_sizes); @@ -133,7 +136,6 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, huge_pages_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops huge_page_ops = { @@ -148,6 +150,7 @@ huge_pages_object(struct drm_i915_private *i915, u64 size, unsigned int page_mask) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -164,7 +167,9 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops); + i915_gem_object_init(obj, &huge_page_ops, &lock_class); + + i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -225,8 +230,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) i915_sg_trim(st); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -259,8 +262,6 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; sg_dma_address(sg) = page_size; - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -279,7 +280,6 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj, { fake_free_huge_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -297,6 +297,7 @@ static const struct drm_i915_gem_object_ops fake_ops_single = { static struct drm_i915_gem_object * fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -315,9 +316,11 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) drm_gem_private_object_init(&i915->drm, &obj->base, size); if (single) - i915_gem_object_init(obj, &fake_ops_single); + i915_gem_object_init(obj, &fake_ops_single, &lock_class); else - i915_gem_object_init(obj, &fake_ops); + i915_gem_object_init(obj, &fake_ops, &lock_class); + + i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -331,7 +334,12 @@ static int igt_check_page_sizes(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj = vma->obj; - int err = 0; + int err; + + /* We have to wait for the async bind to complete before our asserts */ + err = i915_vma_sync(vma); + if (err) + return err; if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { pr_err("unsupported page_sizes.sg=%u, supported=%u\n", @@ -445,6 +453,88 @@ out_device: return err; } +static int igt_mock_memory_region_huge_pages(void *arg) +{ + const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; + struct i915_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct intel_memory_region *mem; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int bit; + int err = 0; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("%s failed to create memory region\n", __func__); + return PTR_ERR(mem); + } + + for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + unsigned int page_size = BIT(bit); + resource_size_t phys; + int i; + + for (i = 0; i < ARRAY_SIZE(flags); ++i) { + obj = i915_gem_object_create_region(mem, page_size, + flags[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_region; + } + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + phys = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(phys, page_size)) { + pr_err("%s addr misaligned(%pa) page_size=%u\n", + __func__, &phys, page_size); + err = -EINVAL; + goto out_unpin; + } + + if (vma->page_sizes.gtt != page_size) { + pr_err("%s page_sizes.gtt=%u, expected=%u\n", + __func__, vma->page_sizes.gtt, + page_size); + err = -EINVAL; + goto out_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + __i915_gem_object_put_pages(obj); + i915_gem_object_put(obj); + } + } + + goto out_region; + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_region: + intel_memory_region_put(mem); + return err; +} + static int igt_mock_ppgtt_misaligned_dma(void *arg) { struct i915_ppgtt *ppgtt = arg; @@ -560,7 +650,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) i915_vma_close(vma); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } @@ -588,7 +678,7 @@ static void close_object_list(struct list_head *objects, list_del(&obj->st_link); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -858,7 +948,7 @@ static int igt_mock_ppgtt_64K(void *arg) i915_vma_close(vma); i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -877,129 +967,25 @@ out_object_put: return err; } -static struct i915_vma * -gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) -{ - struct drm_i915_private *i915 = vma->vm->i915; - const int gen = INTEL_GEN(i915); - unsigned int count = vma->size >> PAGE_SHIFT; - struct drm_i915_gem_object *obj; - struct i915_vma *batch; - unsigned int size; - u32 *cmd; - int n; - int err; - - size = (1 + 4 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - offset += vma->node.start; - - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = val; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = val; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = val; - } - - offset += PAGE_SIZE; - } - - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(obj); - - batch = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (err) - goto err; - - return batch; - -err: - i915_gem_object_put(obj); - - return ERR_PTR(err); -} - -static int gpu_write(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u32 dword, - u32 value) +static int gpu_write(struct intel_context *ce, + struct i915_vma *vma, + u32 dw, + u32 val) { - struct i915_request *rq; - struct i915_vma *batch; int err; - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); - - batch = gpu_write_dw(vma, dword * sizeof(u32), value); - if (IS_ERR(batch)) - return PTR_ERR(batch); - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); - if (err) - goto err_request; - - i915_vma_lock(vma); - err = i915_gem_object_set_to_gtt_domain(vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - if (err) - goto err_request; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); -err_request: + i915_gem_object_lock(vma->obj); + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + i915_gem_object_unlock(vma->obj); if (err) - i915_request_skip(rq, err); - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); + return err; - return err; + return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), + vma->size >> PAGE_SHIFT, val); } -static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +static int +__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) { unsigned int needs_flush; unsigned long n; @@ -1031,19 +1017,54 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } -static int __igt_write_huge(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned long n = obj->base.size >> PAGE_SHIFT; + u32 *ptr; + int err; + + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + ptr += dword; + while (n--) { + if (*ptr != val) { + pr_err("base[%u]=%08x, val=%08x\n", + dword, *ptr, val); + err = -EINVAL; + break; + } + + ptr += PAGE_SIZE / sizeof(*ptr); + } + + i915_gem_object_unpin_map(obj); + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + if (i915_gem_object_has_struct_page(obj)) + return __cpu_check_shmem(obj, dword, val); + else + return __cpu_check_vmap(obj, dword, val); +} + +static int __igt_write_huge(struct intel_context *ce, struct drm_i915_gem_object *obj, u64 size, u64 offset, u32 dword, u32 val) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; struct i915_vma *vma; int err; - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1057,7 +1078,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, * The ggtt may have some pages reserved so * refrain from erroring out. */ - if (err == -ENOSPC && i915_is_ggtt(vm)) + if (err == -ENOSPC && i915_is_ggtt(ce->vm)) err = 0; goto out_vma_close; @@ -1067,7 +1088,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, if (err) goto out_vma_unpin; - err = gpu_write(vma, ctx, engine, dword, val); + err = gpu_write(ce, vma, dword, val); if (err) { pr_err("gpu-write failed at offset=%llx\n", offset); goto out_vma_unpin; @@ -1082,22 +1103,20 @@ static int __igt_write_huge(struct i915_gem_context *ctx, out_vma_unpin: i915_vma_unpin(vma); out_vma_close: - i915_vma_destroy(vma); - + __i915_vma_put(vma); return err; } static int igt_write_huge(struct i915_gem_context *ctx, struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; - static struct intel_engine_cs *engines[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct intel_context *ce; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); unsigned int max_page_size; - unsigned int id; + unsigned int count; u64 max; u64 num; u64 size; @@ -1111,19 +1130,18 @@ static int igt_write_huge(struct i915_gem_context *ctx, if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) size = round_up(size, I915_GTT_PAGE_SIZE_2M); - max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); - max = div_u64((vm->total - size), max_page_size); - n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) { - pr_info("store-dword-imm not supported on engine=%u\n", - id); + count = 0; + max = U64_MAX; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) continue; - } - engines[n++] = engine; - } + max = min(max, ce->vm->total); + n++; + } + i915_gem_context_unlock_engines(ctx); if (!n) return 0; @@ -1132,23 +1150,30 @@ static int igt_write_huge(struct i915_gem_context *ctx, * randomized order, lets also make feeding to the same engine a few * times in succession a possibility by enlarging the permutation array. */ - order = i915_random_order(n * I915_NUM_ENGINES, &prng); + order = i915_random_order(count * count, &prng); if (!order) return -ENOMEM; + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64(max - size, max_page_size); + /* * Try various offsets in an ascending/descending fashion until we * timeout -- we want to avoid issues hidden by effectively always using * offset = 0. */ i = 0; + engines = i915_gem_context_lock_engines(ctx); for_each_prime_number_from(num, 0, max) { u64 offset_low = num * max_page_size; u64 offset_high = (max - num) * max_page_size; u32 dword = offset_in_page(num) / 4; + struct intel_context *ce; - engine = engines[order[i] % n]; - i = (i + 1) % (n * I915_NUM_ENGINES); + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; /* * In order to utilize 64K pages we need to both pad the vma @@ -1160,22 +1185,23 @@ static int igt_write_huge(struct i915_gem_context *ctx, offset_low = round_down(offset_low, I915_GTT_PAGE_SIZE_2M); - err = __igt_write_huge(ctx, engine, obj, size, offset_low, + err = __igt_write_huge(ce, obj, size, offset_low, dword, num + 1); if (err) break; - err = __igt_write_huge(ctx, engine, obj, size, offset_high, + err = __igt_write_huge(ce, obj, size, offset_high, dword, num + 1); if (err) break; if (igt_timeout(end_time, - "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", - __func__, engine->id, offset_low, offset_high, + "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, ce->engine->name, offset_low, offset_high, max_page_size)) break; } + i915_gem_context_unlock_engines(ctx); kfree(order); @@ -1267,7 +1293,7 @@ static int igt_ppgtt_exhaust_huge(void *arg) } i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); i915_gem_object_put(obj); } } @@ -1283,131 +1309,235 @@ out_device: return err; } -static int igt_ppgtt_internal_huge(void *arg) +typedef struct drm_i915_gem_object * +(*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags); + +static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) +{ + return i915->mm.gemfs && has_transparent_hugepage(); +} + +static struct drm_i915_gem_object * +igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags) +{ + if (!igt_can_allocate_thp(i915)) { + pr_info("%s missing THP support, skipping\n", __func__); + return ERR_PTR(-ENODEV); + } + + return i915_gem_object_create_shmem(i915, size); +} + +static struct drm_i915_gem_object * +igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return i915_gem_object_create_internal(i915, size); +} + +static struct drm_i915_gem_object * +igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return huge_pages_object(i915, size, size); +} + +static struct drm_i915_gem_object * +igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return i915_gem_object_create_lmem(i915, size, flags); +} + +static u32 igt_random_size(struct rnd_state *prng, + u32 min_page_size, + u32 max_page_size) +{ + u64 mask; + u32 size; + + GEM_BUG_ON(!is_power_of_2(min_page_size)); + GEM_BUG_ON(!is_power_of_2(max_page_size)); + GEM_BUG_ON(min_page_size < PAGE_SIZE); + GEM_BUG_ON(min_page_size > max_page_size); + + mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK; + size = prandom_u32_state(prng) & mask; + if (size < min_page_size) + size |= min_page_size; + + return size; +} + +static int igt_ppgtt_smoke_huge(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_64K, - SZ_128K, - SZ_256K, - SZ_512K, - SZ_1M, - SZ_2M, + I915_RND_STATE(prng); + struct { + igt_create_fn fn; + u32 min; + u32 max; + } backends[] = { + { igt_create_internal, SZ_64K, SZ_2M, }, + { igt_create_shmem, SZ_64K, SZ_32M, }, + { igt_create_local, SZ_64K, SZ_1G, }, }; - int i; int err; + int i; /* - * Sanity check that the HW uses huge pages correctly through internal - * -- ensure that our writes land in the right place. + * Sanity check that the HW uses huge pages correctly through our + * various backends -- ensure that our writes land in the right place. */ - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; + for (i = 0; i < ARRAY_SIZE(backends); ++i) { + u32 min = backends[i].min; + u32 max = backends[i].max; + u32 size = max; +try_again: + size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + obj = backends[i].fn(i915, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -E2BIG) { + size >>= 1; + goto try_again; + } else if (err == -ENODEV) { + err = 0; + continue; + } + + return err; + } err = i915_gem_object_pin_pages(obj); - if (err) + if (err) { + if (err == -ENXIO || err == -E2BIG) { + i915_gem_object_put(obj); + size >>= 1; + goto try_again; + } goto out_put; + } - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { - pr_info("internal unable to allocate huge-page(s) with size=%u\n", - size); + if (obj->mm.page_sizes.phys < min) { + pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n", + __func__, size, i); + err = -ENOMEM; goto out_unpin; } err = igt_write_huge(ctx, obj); if (err) { - pr_err("internal write-huge failed with size=%u\n", - size); - goto out_unpin; + pr_err("%s write-huge failed with size=%u, i=%d\n", + __func__, size, i); } - +out_unpin: i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + __i915_gem_object_put_pages(obj); +out_put: i915_gem_object_put(obj); - } - return 0; + if (err == -ENOMEM || err == -ENXIO) + err = 0; -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); + if (err) + break; - return err; -} + cond_resched(); + } -static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) -{ - return i915->mm.gemfs && has_transparent_hugepage(); + return err; } -static int igt_ppgtt_gemfs_huge(void *arg) +static int igt_ppgtt_sanity_check(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_2M, - SZ_4M, - SZ_8M, - SZ_16M, - SZ_32M, + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct { + igt_create_fn fn; + unsigned int flags; + } backends[] = { + { igt_create_system, 0, }, + { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, }; - int i; + struct { + u32 size; + u32 pages; + } combos[] = { + { SZ_64K, SZ_64K }, + { SZ_2M, SZ_2M }, + { SZ_2M, SZ_64K }, + { SZ_2M - SZ_64K, SZ_64K }, + { SZ_2M - SZ_4K, SZ_64K | SZ_4K }, + { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, + { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, + { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, + }; + int i, j; int err; + if (supported == I915_GTT_PAGE_SIZE_4K) + return 0; + /* - * Sanity check that the HW uses huge pages correctly through gemfs -- - * ensure that our writes land in the right place. + * Sanity check that the HW behaves with a limited set of combinations. + * We already have a bunch of randomised testing, which should give us + * a decent amount of variation between runs, however we should keep + * this to limit the chances of introducing a temporary regression, by + * testing the most obvious cases that might make something blow up. */ - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - return 0; - } + for (i = 0; i < ARRAY_SIZE(backends); ++i) { + for (j = 0; j < ARRAY_SIZE(combos); ++j) { + struct drm_i915_gem_object *obj; + u32 size = combos[j].size; + u32 pages = combos[j].pages; + + obj = backends[i].fn(i915, size, backends[i].flags); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -ENODEV) { + pr_info("Device lacks local memory, skipping\n"); + err = 0; + break; + } - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; + return err; + } - obj = i915_gem_object_create_shmem(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; + GEM_BUG_ON(pages > obj->base.size); + pages = pages & supported; - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { - pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", - size); - goto out_unpin; - } + if (pages) + obj->mm.page_sizes.sg = pages; - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("gemfs write-huge failed with size=%u\n", - size); - goto out_unpin; + err = igt_write_huge(ctx, obj); + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj); + i915_gem_object_put(obj); + + if (err) { + pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n", + __func__, size, pages, i, j); + goto out; + } } - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); + cond_resched(); } - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); +out: + if (err == -ENOMEM) + err = 0; return err; } @@ -1417,12 +1547,15 @@ static int igt_ppgtt_pin_update(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *dev_priv = ctx->i915; unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_address_space *vm = ctx->vm; struct drm_i915_gem_object *obj; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int n; int first, last; - int err; + int err = 0; /* * Make sure there's no funny business when doing a PIN_UPDATE -- in the @@ -1432,9 +1565,10 @@ static int igt_ppgtt_pin_update(void *arg) * huge-gtt-pages. */ - if (!vm || !i915_vm_is_4lvl(vm)) { + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_vm_is_4lvl(vm)) { pr_info("48b PPGTT not supported, skipping\n"); - return 0; + goto out_vm; } first = ilog2(I915_GTT_PAGE_SIZE_64K); @@ -1487,7 +1621,7 @@ static int igt_ppgtt_pin_update(void *arg) goto out_unpin; } - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL); if (err) goto out_unpin; @@ -1518,11 +1652,24 @@ static int igt_ppgtt_pin_update(void *arg) * land in the now stale 2M page. */ - err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf); + n = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + err = gpu_write(ce, vma, n++, 0xdeadbeaf); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); if (err) goto out_unpin; - err = cpu_check(obj, 0, 0xdeadbeaf); + while (n--) { + err = cpu_check(obj, n, 0xdeadbeaf); + if (err) + goto out_unpin; + } out_unpin: i915_vma_unpin(vma); @@ -1530,6 +1677,8 @@ out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1539,7 +1688,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1589,6 +1738,7 @@ out_put: out_restore: i915->mm.gemfs = gemfs; + i915_vm_put(vm); return err; } @@ -1596,11 +1746,14 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; + struct i915_gem_engines_iter it; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER; - int err; + unsigned int n; + int err = 0; /* * Sanity check shrinking huge-paged object -- make sure nothing blows @@ -1609,12 +1762,14 @@ static int igt_shrink_thp(void *arg) if (!igt_can_allocate_thp(i915)) { pr_info("missing THP support, skipping\n"); - return 0; + goto out_vm; } obj = i915_gem_object_create_shmem(i915, SZ_2M); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { @@ -1635,11 +1790,20 @@ static int igt_shrink_thp(void *arg) if (err) goto out_unpin; - err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf); - if (err) - goto out_unpin; + n = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + err = gpu_write(ce, vma, n++, 0xdeadbeaf); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); i915_vma_unpin(vma); + if (err) + goto out_close; /* * Now that the pages are *unpinned* shrink-all should invoke @@ -1662,7 +1826,11 @@ static int igt_shrink_thp(void *arg) if (err) goto out_close; - err = cpu_check(obj, 0, 0xdeadbeaf); + while (n--) { + err = cpu_check(obj, n, 0xdeadbeaf); + if (err) + break; + } out_unpin: i915_vma_unpin(vma); @@ -1670,6 +1838,8 @@ out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1678,6 +1848,7 @@ int i915_gem_huge_page_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_memory_region_huge_pages), SUBTEST(igt_mock_ppgtt_misaligned_dma), SUBTEST(igt_mock_ppgtt_huge_fill), SUBTEST(igt_mock_ppgtt_64K), @@ -1694,8 +1865,7 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - mutex_lock(&dev_priv->drm.struct_mutex); - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(&dev_priv->gt); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_unlock; @@ -1720,58 +1890,52 @@ out_close: i915_vm_put(&ppgtt->vm); out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); drm_dev_put(&dev_priv->drm); - return err; } -int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_shrink_thp), SUBTEST(igt_ppgtt_pin_update), SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_exhaust_huge), - SUBTEST(igt_ppgtt_gemfs_huge), - SUBTEST(igt_ppgtt_internal_huge), + SUBTEST(igt_ppgtt_smoke_huge), + SUBTEST(igt_ppgtt_sanity_check), }; - struct drm_file *file; struct i915_gem_context *ctx; - intel_wakeref_t wakeref; + struct i915_address_space *vm; + struct file *file; int err; - if (!HAS_PPGTT(dev_priv)) { + if (!HAS_PPGTT(i915)) { pr_info("PPGTT not supported, skipping live-selftests\n"); return 0; } - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - file = mock_file(dev_priv); + file = mock_file(i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - ctx = live_context(dev_priv, file); + ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - if (ctx->vm) - ctx->vm->scrub_64K = true; + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); -out_unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); - - mock_file_free(dev_priv, file); - +out_file: + fput(file); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index f3a5eb807c1c..b972be165e85 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,14 +5,17 @@ #include "i915_selftest.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" + #include "selftests/igt_flush_test.h" #include "selftests/mock_drm.h" +#include "huge_gem_object.h" #include "mock_context.h" -static int igt_client_fill(void *arg) +static int __igt_client_fill(struct intel_engine_cs *engine) { - struct intel_context *ce = arg; - struct drm_i915_private *i915 = ce->gem_context->i915; + struct intel_context *ce = engine->kernel_context; struct drm_i915_gem_object *obj; struct rnd_state prng; IGT_TIMEOUT(end); @@ -21,16 +24,21 @@ static int igt_client_fill(void *arg) prandom_seed_state(&prng, i915_selftest.random_seed); + intel_engine_pm_get(engine); do { - u32 sz = prandom_u32_state(&prng) % SZ_32M; + const u32 max_block_size = S16_MAX * PAGE_SIZE; + u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); + u32 phys_sz = sz % (max_block_size + 1); u32 val = prandom_u32_state(&prng); u32 i; sz = round_up(sz, PAGE_SIZE); + phys_sz = round_up(phys_sz, PAGE_SIZE); - pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); + pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, + phys_sz, sz, val); - obj = i915_gem_object_create_internal(i915, sz); + obj = huge_gem_object(engine->i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -52,7 +60,8 @@ static int igt_client_fill(void *arg) * values after we do the set_to_cpu_domain and pick it up as a * test failure. */ - memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32)); + memset32(vaddr, val ^ 0xdeadbeaf, + huge_gem_object_phys_size(obj) / sizeof(u32)); if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; @@ -63,24 +72,13 @@ static int igt_client_fill(void *arg) if (err) goto err_unpin; - /* - * XXX: For now do the wait without the object resv lock to - * ensure we don't deadlock. - */ - err = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - i915_gem_object_lock(obj); err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) goto err_unpin; - for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { if (vaddr[i] != val) { pr_err("vaddr[%u]=%x, expected=%x\n", i, vaddr[i], val); @@ -100,28 +98,46 @@ err_unpin: err_put: i915_gem_object_put(obj); err_flush: - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - if (err == -ENOMEM) err = 0; + intel_engine_pm_put(engine); return err; } +static int igt_client_fill(void *arg) +{ + int inst = 0; + + do { + struct intel_engine_cs *engine; + int err; + + engine = intel_engine_lookup_user(arg, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + err = __igt_client_fill(engine); + if (err == -ENOMEM) + err = 0; + if (err) + return err; + } while (1); +} + int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_client_fill), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; if (!HAS_ENGINE(i915, BCS0)) return 0; - return i915_subtests(tests, i915->engine[BCS0]->kernel_context); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 8f22d3f18422..3f6079e1dfb6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -6,12 +6,20 @@ #include <linux/prime_numbers.h> +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_ring.h" + #include "i915_selftest.h" #include "selftests/i915_random.h" -static int cpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +struct context { + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; +}; + +static int cpu_set(struct context *ctx, unsigned long offset, u32 v) { unsigned int needs_clflush; struct page *page; @@ -19,11 +27,11 @@ static int cpu_set(struct drm_i915_gem_object *obj, u32 *cpu; int err; - err = i915_gem_object_prepare_write(obj, &needs_clflush); + err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -36,14 +44,12 @@ static int cpu_set(struct drm_i915_gem_object *obj, drm_clflush_virt_range(cpu, sizeof(*cpu)); kunmap_atomic(map); - i915_gem_object_finish_access(obj); + i915_gem_object_finish_access(ctx->obj); return 0; } -static int cpu_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) { unsigned int needs_clflush; struct page *page; @@ -51,11 +57,11 @@ static int cpu_get(struct drm_i915_gem_object *obj, u32 *cpu; int err; - err = i915_gem_object_prepare_read(obj, &needs_clflush); + err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -65,136 +71,137 @@ static int cpu_get(struct drm_i915_gem_object *obj, *v = *cpu; kunmap_atomic(map); - i915_gem_object_finish_access(obj); + i915_gem_object_finish_access(ctx->obj); return 0; } -static int gtt_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int gtt_set(struct context *ctx, unsigned long offset, u32 v) { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } iowrite32(v, &map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } -static int gtt_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } *v = ioread32(&map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } -static int wc_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int wc_set(struct context *ctx, unsigned long offset, u32 v) { u32 *map; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_wc_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); map[offset / sizeof(*map)] = v; - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(ctx->obj); return 0; } -static int wc_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int wc_get(struct context *ctx, unsigned long offset, u32 *v) { u32 *map; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, false); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_wc_domain(ctx->obj, false); + i915_gem_object_unlock(ctx->obj); if (err) return err; - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); *v = map[offset / sizeof(*map)]; - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(ctx->obj); return 0; } -static int gpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int gpu_set(struct context *ctx, unsigned long offset, u32 v) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_request *rq; struct i915_vma *vma; u32 *cs; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_create(i915->engine[RCS0]->kernel_context); + rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -207,12 +214,12 @@ static int gpu_set(struct drm_i915_gem_object *obj, return PTR_ERR(cs); } - if (INTEL_GEN(i915) >= 8) { + if (INTEL_GEN(ctx->engine->i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = v; - } else if (INTEL_GEN(i915) >= 4) { + } else if (INTEL_GEN(ctx->engine->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = i915_ggtt_offset(vma) + offset; @@ -226,7 +233,9 @@ static int gpu_set(struct drm_i915_gem_object *obj, intel_ring_advance(rq, cs); i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); i915_vma_unpin(vma); @@ -235,29 +244,34 @@ static int gpu_set(struct drm_i915_gem_object *obj, return err; } -static bool always_valid(struct drm_i915_private *i915) +static bool always_valid(struct context *ctx) { return true; } -static bool needs_fence_registers(struct drm_i915_private *i915) +static bool needs_fence_registers(struct context *ctx) { - return !i915_terminally_wedged(i915); + struct intel_gt *gt = ctx->engine->gt; + + if (intel_gt_is_wedged(gt)) + return false; + + return gt->ggtt->num_fences; } -static bool needs_mi_store_dword(struct drm_i915_private *i915) +static bool needs_mi_store_dword(struct context *ctx) { - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(ctx->engine->gt)) return false; - return intel_engine_can_store_dword(i915->engine[RCS0]); + return intel_engine_can_store_dword(ctx->engine); } static const struct igt_coherency_mode { const char *name; - int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); - int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); - bool (*valid)(struct drm_i915_private *i915); + int (*set)(struct context *ctx, unsigned long offset, u32 v); + int (*get)(struct context *ctx, unsigned long offset, u32 *v); + bool (*valid)(struct context *ctx); } igt_coherency_mode[] = { { "cpu", cpu_set, cpu_get, always_valid }, { "gtt", gtt_set, gtt_get, needs_fence_registers }, @@ -266,19 +280,37 @@ static const struct igt_coherency_mode { { }, }; +static struct intel_engine_cs * +random_engine(struct drm_i915_private *i915, struct rnd_state *prng) +{ + struct intel_engine_cs *engine; + unsigned int count; + + count = 0; + for_each_uabi_engine(engine, i915) + count++; + + count = i915_prandom_u32_max_state(count, prng); + for_each_uabi_engine(engine, i915) + if (count-- == 0) + return engine; + + return NULL; +} + static int igt_gem_coherency(void *arg) { const unsigned int ncachelines = PAGE_SIZE/64; - I915_RND_STATE(prng); struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; - struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; unsigned long count, n; u32 *offsets, *values; + I915_RND_STATE(prng); + struct context ctx; int err = 0; - /* We repeatedly write, overwrite and read from a sequence of + /* + * We repeatedly write, overwrite and read from a sequence of * cachelines in order to try and detect incoherency (unflushed writes * from either the CPU or GPU). Each setter/getter uses our cache * domain API which should prevent incoherency. @@ -292,34 +324,40 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + ctx.engine = random_engine(i915, &prng); + if (!ctx.engine) { + err = -ENODEV; + goto out_free; + } + pr_info("%s: using %s\n", __func__, ctx.engine->name); + intel_engine_pm_get(ctx.engine); + for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; - if (!over->valid(i915)) + if (!over->valid(&ctx)) continue; for (write = igt_coherency_mode; write->name; write++) { if (!write->set) continue; - if (!write->valid(i915)) + if (!write->valid(&ctx)) continue; for (read = igt_coherency_mode; read->name; read++) { if (!read->get) continue; - if (!read->valid(i915)) + if (!read->valid(&ctx)) continue; for_each_prime_number_from(count, 1, ncachelines) { - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto unlock; + ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(ctx.obj)) { + err = PTR_ERR(ctx.obj); + goto out_pm; } i915_random_reorder(offsets, ncachelines, &prng); @@ -327,7 +365,7 @@ static int igt_gem_coherency(void *arg) values[n] = prandom_u32_state(&prng); for (n = 0; n < count; n++) { - err = over->set(obj, offsets[n], ~values[n]); + err = over->set(&ctx, offsets[n], ~values[n]); if (err) { pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", n, count, over->name, err); @@ -336,7 +374,7 @@ static int igt_gem_coherency(void *arg) } for (n = 0; n < count; n++) { - err = write->set(obj, offsets[n], values[n]); + err = write->set(&ctx, offsets[n], values[n]); if (err) { pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", n, count, write->name, err); @@ -347,7 +385,7 @@ static int igt_gem_coherency(void *arg) for (n = 0; n < count; n++) { u32 found; - err = read->get(obj, offsets[n], &found); + err = read->get(&ctx, offsets[n], &found); if (err) { pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", n, count, read->name, err); @@ -365,20 +403,20 @@ static int igt_gem_coherency(void *arg) } } - i915_gem_object_put(obj); + i915_gem_object_put(ctx.obj); } } } } -unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +out_pm: + intel_engine_pm_put(ctx.engine); +out_free: kfree(offsets); return err; put_object: - i915_gem_object_put(obj); - goto unlock; + i915_gem_object_put(ctx.obj); + goto out_pm; } int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index eaa2b16574c7..7fc46861a54d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -7,6 +7,9 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -24,16 +27,20 @@ #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) +static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx) +{ + /* single threaded, private ctx */ + return rcu_dereference_protected(ctx->vm, true); +} + static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct i915_gem_context **ctx; - enum intel_engine_id id; - intel_wakeref_t wakeref; struct igt_live_test t; - struct drm_file *file; + struct file *file; unsigned long n; int err = -ENODEV; @@ -52,43 +59,49 @@ static int live_nop_switch(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; - goto out_unlock; + goto out_file; } for (n = 0; n < nctx; n++) { ctx[n] = live_context(i915, file); if (IS_ERR(ctx[n])) { err = PTR_ERR(ctx[n]); - goto out_unlock; + goto out_file; } } - for_each_engine(engine, i915, id) { - struct i915_request *rq; + for_each_uabi_engine(engine, i915) { + struct i915_request *rq = NULL; unsigned long end_time, prime; ktime_t times[2] = {}; times[0] = ktime_get_raw(); for (n = 0; n < nctx; n++) { - rq = igt_request_alloc(ctx[n], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; + struct i915_request *this; + + this = igt_request_alloc(ctx[n], engine); + if (IS_ERR(this)) { + err = PTR_ERR(this); + goto out_file; } - i915_request_add(rq); + if (rq) { + i915_request_await_dma_fence(this, &rq->fence); + i915_request_put(rq); + } + rq = i915_request_get(this); + i915_request_add(this); } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Failed to populated %d contexts\n", nctx); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); + i915_request_put(rq); err = -EIO; - goto out_unlock; + goto out_file; } + i915_request_put(rq); times[1] = ktime_get_raw(); @@ -97,17 +110,25 @@ static int live_nop_switch(void *arg) err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; end_time = jiffies + i915_selftest.timeout_jiffies; for_each_prime_number_from(prime, 2, 8192) { times[1] = ktime_get_raw(); + rq = NULL; for (n = 0; n < prime; n++) { - rq = igt_request_alloc(ctx[n % nctx], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; + struct i915_request *this; + + this = igt_request_alloc(ctx[n % nctx], engine); + if (IS_ERR(this)) { + err = PTR_ERR(this); + goto out_file; + } + + if (rq) { /* Force submission order */ + i915_request_await_dma_fence(this, &rq->fence); + i915_request_put(rq); } /* @@ -124,14 +145,18 @@ static int live_nop_switch(void *arg) * for latency. */ - i915_request_add(rq); + rq = i915_request_get(this); + i915_request_add(this); } + GEM_BUG_ON(!rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Switching between %ld contexts timed out\n", prime); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); + i915_request_put(rq); break; } + i915_request_put(rq); times[1] = ktime_sub(ktime_get_raw(), times[1]); if (prime == 2) @@ -143,7 +168,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + goto out_file; pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -151,75 +176,237 @@ static int live_nop_switch(void *arg) prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); +out_file: + fput(file); return err; } -static struct i915_vma * -gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +struct parallel_switch { + struct task_struct *tsk; + struct intel_context *ce[2]; +}; + +static int __live_parallel_switch1(void *data) { - struct drm_i915_gem_object *obj; - const int gen = INTEL_GEN(vma->vm->i915); - unsigned long n, size; - u32 *cmd; - int err; + struct parallel_switch *arg = data; + IGT_TIMEOUT(end_time); + unsigned long count; - size = (4 * count + 1) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(vma->vm->i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); + count = 0; + do { + struct i915_request *rq = NULL; + int err, n; - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; + err = 0; + for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *prev = rq; + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) { + i915_request_put(prev); + return PTR_ERR(rq); + } + + i915_request_get(rq); + if (prev) { + err = i915_request_await_dma_fence(rq, &prev->fence); + i915_request_put(prev); + } + + i915_request_add(rq); + } + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + return err; + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int __live_parallel_switchN(void *data) +{ + struct parallel_switch *arg = data; + struct i915_request *rq = NULL; + IGT_TIMEOUT(end_time); + unsigned long count; + int n; + + count = 0; + do { + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *prev = rq; + int err = 0; + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) { + i915_request_put(prev); + return PTR_ERR(rq); + } + + i915_request_get(rq); + if (prev) { + err = i915_request_await_dma_fence(rq, &prev->fence); + i915_request_put(prev); + } + + i915_request_add(rq); + if (err) { + i915_request_put(rq); + return err; + } + } + + count++; + } while (!__igt_timeout(end_time, NULL)); + i915_request_put(rq); + + pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int live_parallel_switch(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + __live_parallel_switch1, + __live_parallel_switchN, + NULL, + }; + struct parallel_switch *data = NULL; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + int (* const *fn)(void *arg); + struct i915_gem_context *ctx; + struct intel_context *ce; + struct file *file; + int n, m, count; + int err = 0; + + /* + * Check we can process switches on all engines simultaneously. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; } - GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); - offset += vma->node.start; + engines = i915_gem_context_lock_engines(ctx); + count = engines->num_engines; - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = value; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = value; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = value; + data = kcalloc(count, sizeof(*data), GFP_KERNEL); + if (!data) { + i915_gem_context_unlock_engines(ctx); + err = -ENOMEM; + goto out_file; + } + + m = 0; /* Use the first context as our template for the engines */ + for_each_gem_engine(ce, engines, it) { + err = intel_context_pin(ce); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out; } - offset += PAGE_SIZE; + data[m++].ce[0] = intel_context_get(ce); } - *cmd = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); + i915_gem_context_unlock_engines(ctx); - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; + /* Clone the same set of engines into the other contexts */ + for (n = 1; n < ARRAY_SIZE(data->ce); n++) { + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + + for (m = 0; m < count; m++) { + if (!data[m].ce[0]) + continue; + + ce = intel_context_create(data[m].ce[0]->engine); + if (IS_ERR(ce)) + goto out; + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out; + } + + data[m].ce[n] = ce; + } } - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; + for (fn = func; !err && *fn; fn++) { + struct igt_live_test t; + int n; - return vma; + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + break; -err: - i915_gem_object_put(obj); - return ERR_PTR(err); + for (n = 0; n < count; n++) { + if (!data[n].ce[0]) + continue; + + data[n].tsk = kthread_run(*fn, &data[n], + "igt/parallel:%s", + data[n].ce[0]->engine->name); + if (IS_ERR(data[n].tsk)) { + err = PTR_ERR(data[n].tsk); + break; + } + get_task_struct(data[n].tsk); + } + + yield(); /* start all threads before we kthread_stop() */ + + for (n = 0; n < count; n++) { + int status; + + if (IS_ERR_OR_NULL(data[n].tsk)) + continue; + + status = kthread_stop(data[n].tsk); + if (status && !err) + err = status; + + put_task_struct(data[n].tsk); + data[n].tsk = NULL; + } + + if (igt_live_test_end(&t)) + err = -EIO; + } + +out: + for (n = 0; n < count; n++) { + for (m = 0; m < ARRAY_SIZE(data->ce); m++) { + if (!data[n].ce[m]) + continue; + + intel_context_unpin(data[n].ce[m]); + intel_context_put(data[n].ce[m]); + } + } + kfree(data); +out_file: + fput(file); + return err; } static unsigned long real_page_count(struct drm_i915_gem_object *obj) @@ -232,100 +419,39 @@ static unsigned long fake_page_count(struct drm_i915_gem_object *obj) return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; } -static int gpu_fill(struct drm_i915_gem_object *obj, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_fill(struct intel_context *ce, + struct drm_i915_gem_object *obj, unsigned int dw) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; - struct i915_request *rq; struct i915_vma *vma; - struct i915_vma *batch; - unsigned int flags; int err; - GEM_BUG_ON(obj->base.size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(obj->base.size > ce->vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - return err; - err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); if (err) return err; - /* Within the GTT the huge objects maps every page onto + /* + * Within the GTT the huge objects maps every page onto * its 1024 real pages (using phys_pfn = dma_pfn % 1024). * We set the nth dword within the page using the nth * mapping via the GTT - this should exercise the GTT mapping * whilst checking that each context provides a unique view * into the object. */ - batch = gpu_fill_dw(vma, - (dw * real_page_count(obj)) << PAGE_SHIFT | - (dw * sizeof(u32)), - real_page_count(obj), - dw); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_vma; - } - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - flags = 0; - if (INTEL_GEN(vm->i915) <= 5) - flags |= I915_DISPATCH_SECURE; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); - if (err) - goto err_request; - - i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); - if (err) - goto skip_request; - - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - if (err) - goto skip_request; - - i915_request_add(rq); - - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + err = igt_gpu_fill_dw(ce, vma, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); i915_vma_unpin(vma); - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); -err_vma: - i915_vma_unpin(vma); return err; } @@ -404,17 +530,17 @@ out_unmap: return err; } -static int file_add_object(struct drm_file *file, - struct drm_i915_gem_object *obj) +static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) { int err; GEM_BUG_ON(obj->base.handle_count); /* tie the object to the drm_file for easy reaping */ - err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); + err = idr_alloc(&to_drm_file(file)->object_idr, + &obj->base, 1, 0, GFP_KERNEL); if (err < 0) - return err; + return err; i915_gem_object_get(obj); obj->base.handle_count++; @@ -422,19 +548,21 @@ static int file_add_object(struct drm_file *file, } static struct drm_i915_gem_object * -create_test_object(struct i915_gem_context *ctx, - struct drm_file *file, +create_test_object(struct i915_address_space *vm, + struct file *file, struct list_head *objects) { struct drm_i915_gem_object *obj; - struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm; u64 size; int err; + /* Keep in GEM's good graces */ + intel_gt_retire_requests(vm->gt); + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); size = round_down(size, DW_PER_PAGE * PAGE_SIZE); - obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); if (IS_ERR(obj)) return obj; @@ -462,11 +590,49 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) return npages / DW_PER_PAGE; } +static void throttle_release(struct i915_request **q, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (IS_ERR_OR_NULL(q[i])) + continue; + + i915_request_put(fetch_and_zero(&q[i])); + } +} + +static int throttle(struct intel_context *ce, + struct i915_request **q, int count) +{ + int i; + + if (!IS_ERR_OR_NULL(q[0])) { + if (i915_request_wait(q[0], + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + return -EINTR; + + i915_request_put(q[0]); + } + + for (i = 0; i < count - 1; i++) + q[i] = q[i + 1]; + + q[i] = intel_context_create_request(ce); + if (IS_ERR(q[i])) + return PTR_ERR(q[i]); + + i915_request_get(q[i]); + i915_request_add(q[i]); + + return 0; +} + static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - enum intel_engine_id id; int err = -ENODEV; /* @@ -478,13 +644,14 @@ static int igt_ctx_exec(void *arg) if (!DRIVER_CAPS(i915)->has_logical_contexts) return 0; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; + struct i915_request *tq[5] = {}; struct igt_live_test t; - struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); + struct file *file; if (!intel_engine_can_store_dword(engine)) continue; @@ -496,41 +663,53 @@ static int igt_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; ncontexts = 0; ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; - intel_wakeref_t wakeref; + struct intel_context *ce; - ctx = live_context(i915, file); + ctx = kernel_context(i915); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); + if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } } - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -540,6 +719,9 @@ static int igt_ctx_exec(void *arg) ndwords++; ncontexts++; + + intel_context_put(ce); + kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -557,14 +739,16 @@ static int igt_ctx_exec(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); + fput(file); if (err) return err; + + i915_gem_drain_freed_objects(i915); } return 0; @@ -573,11 +757,11 @@ out_unlock: static int igt_shared_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; + struct i915_request *tq[5] = {}; struct i915_gem_context *parent; struct intel_engine_cs *engine; - enum intel_engine_id id; struct igt_live_test t; - struct drm_file *file; + struct file *file; int err = 0; /* @@ -592,24 +776,22 @@ static int igt_shared_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - parent = live_context(i915, file); if (IS_ERR(parent)) { err = PTR_ERR(parent); - goto out_unlock; + goto out_file; } if (!parent->vm) { /* not full-ppgtt; nothing to share */ err = 0; - goto out_unlock; + goto out_file; } err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { unsigned long ncontexts, ndwords, dw; struct drm_i915_gem_object *obj = NULL; IGT_TIMEOUT(end_time); @@ -623,7 +805,7 @@ static int igt_shared_ctx_exec(void *arg) ncontexts = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; - intel_wakeref_t wakeref; + struct intel_context *ce; ctx = kernel_context(i915); if (IS_ERR(ctx)) { @@ -631,25 +813,39 @@ static int igt_shared_ctx_exec(void *arg) goto out_test; } - __assign_ppgtt(ctx, parent->vm); + mutex_lock(&ctx->mutex); + __assign_ppgtt(ctx, ctx_vm(parent)); + mutex_unlock(&ctx->mutex); + + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); if (!obj) { - obj = create_test_object(parent, file, &objects); + obj = create_test_object(ctx_vm(parent), + file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } } - err = 0; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_test; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } @@ -662,6 +858,7 @@ static int igt_shared_ctx_exec(void *arg) ndwords++; ncontexts++; + intel_context_put(ce); kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -678,14 +875,15 @@ static int igt_shared_ctx_exec(void *arg) dw += rem; } + + i915_gem_drain_freed_objects(i915); } out_test: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); +out_file: + fput(file); return err; } @@ -717,6 +915,8 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -746,7 +946,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - vma = i915_vma_instance(obj, ce->gem_context->vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -779,21 +979,22 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, goto err_request; i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); i915_vma_unlock(batch); if (err) goto skip_request; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto skip_request; - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + i915_vma_unpin_and_release(&batch, 0); i915_vma_unpin(vma); *rq_out = i915_request_get(rq); @@ -807,8 +1008,7 @@ skip_request: err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); err_vma: i915_vma_unpin(vma); @@ -820,8 +1020,7 @@ err_vma: #define TEST_RESET BIT(2) static int -__sseu_prepare(struct drm_i915_private *i915, - const char *name, +__sseu_prepare(const char *name, unsigned int flags, struct intel_context *ce, struct igt_spinner **spin) @@ -837,14 +1036,11 @@ __sseu_prepare(struct drm_i915_private *i915, if (!*spin) return -ENOMEM; - ret = igt_spinner_init(*spin, i915); + ret = igt_spinner_init(*spin, ce->engine->gt); if (ret) goto err_free; - rq = igt_spinner_create_request(*spin, - ce->gem_context, - ce->engine, - MI_NOOP); + rq = igt_spinner_create_request(*spin, ce, MI_NOOP); if (IS_ERR(rq)) { ret = PTR_ERR(rq); goto err_fini; @@ -870,8 +1066,7 @@ err_free: } static int -__read_slice_count(struct drm_i915_private *i915, - struct intel_context *ce, +__read_slice_count(struct intel_context *ce, struct drm_i915_gem_object *obj, struct igt_spinner *spin, u32 *rpcs) @@ -900,7 +1095,7 @@ __read_slice_count(struct drm_i915_private *i915, return ret; } - if (INTEL_GEN(i915) >= 11) { + if (INTEL_GEN(ce->engine->i915) >= 11) { s_mask = GEN11_RPCS_S_CNT_MASK; s_shift = GEN11_RPCS_S_CNT_SHIFT; } else { @@ -943,8 +1138,7 @@ __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, } static int -__sseu_finish(struct drm_i915_private *i915, - const char *name, +__sseu_finish(const char *name, unsigned int flags, struct intel_context *ce, struct drm_i915_gem_object *obj, @@ -956,19 +1150,18 @@ __sseu_finish(struct drm_i915_private *i915, int ret = 0; if (flags & TEST_RESET) { - ret = i915_reset_engine(ce->engine, "sseu"); + ret = intel_engine_reset(ce->engine, "sseu"); if (ret) goto out; } - ret = __read_slice_count(i915, ce, obj, + ret = __read_slice_count(ce, obj, flags & TEST_RESET ? NULL : spin, &rpcs); ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); if (ret) goto out; - ret = __read_slice_count(i915, ce->engine->kernel_context, obj, - NULL, &rpcs); + ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); out: @@ -976,11 +1169,11 @@ out: igt_spinner_end(spin); if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(i915, 0, MAX_SCHEDULE_TIMEOUT); + ret = igt_flush_test(ce->engine->i915); if (ret) return ret; - ret = __read_slice_count(i915, ce, obj, NULL, &rpcs); + ret = __read_slice_count(ce, obj, NULL, &rpcs); ret = __check_rpcs(name, rpcs, ret, expected, "Context", " after idle!"); } @@ -989,8 +1182,7 @@ out: } static int -__sseu_test(struct drm_i915_private *i915, - const char *name, +__sseu_test(const char *name, unsigned int flags, struct intel_context *ce, struct drm_i915_gem_object *obj, @@ -999,15 +1191,17 @@ __sseu_test(struct drm_i915_private *i915, struct igt_spinner *spin = NULL; int ret; - ret = __sseu_prepare(i915, name, flags, ce, &spin); + intel_engine_pm_get(ce->engine); + + ret = __sseu_prepare(name, flags, ce, &spin); if (ret) - return ret; + goto out_pm; - ret = __intel_context_reconfigure_sseu(ce, sseu); + ret = intel_context_reconfigure_sseu(ce, sseu); if (ret) goto out_spin; - ret = __sseu_finish(i915, name, flags, ce, obj, + ret = __sseu_finish(name, flags, ce, obj, hweight32(sseu.slice_mask), spin); out_spin: @@ -1016,6 +1210,8 @@ out_spin: igt_spinner_fini(spin); kfree(spin); } +out_pm: + intel_engine_pm_put(ce->engine); return ret; } @@ -1024,53 +1220,15 @@ __igt_ctx_sseu(struct drm_i915_private *i915, const char *name, unsigned int flags) { - struct intel_engine_cs *engine = i915->engine[RCS0]; - struct intel_sseu default_sseu = engine->sseu; struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - struct intel_sseu pg_sseu; - intel_wakeref_t wakeref; - struct drm_file *file; - int ret; - - if (INTEL_GEN(i915) < 9) - return 0; - - if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) - return 0; + int inst = 0; + int ret = 0; - if (hweight32(default_sseu.slice_mask) < 2) + if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg) return 0; - /* - * Gen11 VME friendly power-gated configuration with half enabled - * sub-slices. - */ - pg_sseu = default_sseu; - pg_sseu.slice_mask = 1; - pg_sseu.subslice_mask = - ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); - - pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", - name, flags, hweight32(default_sseu.slice_mask), - hweight32(pg_sseu.slice_mask)); - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - if (flags & TEST_RESET) - igt_global_reset_lock(i915); - - mutex_lock(&i915->drm.struct_mutex); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } - i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ + igt_global_reset_lock(&i915->gt); obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { @@ -1078,56 +1236,79 @@ __igt_ctx_sseu(struct drm_i915_private *i915, goto out_unlock; } - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + do { + struct intel_engine_cs *engine; + struct intel_context *ce; + struct intel_sseu pg_sseu; - ce = i915_gem_context_get_engine(ctx, RCS0); - if (IS_ERR(ce)) { - ret = PTR_ERR(ce); - goto out_rpm; - } + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_RENDER, + inst++); + if (!engine) + break; - ret = intel_context_pin(ce); - if (ret) - goto out_context; + if (hweight32(engine->sseu.slice_mask) < 2) + continue; - /* First set the default mask. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); - if (ret) - goto out_fail; + /* + * Gen11 VME friendly power-gated configuration with + * half enabled sub-slices. + */ + pg_sseu = engine->sseu; + pg_sseu.slice_mask = 1; + pg_sseu.subslice_mask = + ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); + + pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", + engine->name, name, flags, + hweight32(engine->sseu.slice_mask), + hweight32(pg_sseu.slice_mask)); + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + goto out_put; + } - /* Then set a power-gated configuration. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; + ret = intel_context_pin(ce); + if (ret) + goto out_ce; - /* Back to defaults. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); - if (ret) - goto out_fail; + /* First set the default mask. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_unpin; - /* One last power-gated configuration for the road. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; + /* Then set a power-gated configuration. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_unpin; + + /* Back to defaults. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_unpin; + + /* One last power-gated configuration for the road. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_unpin; -out_fail: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) +out_unpin: + intel_context_unpin(ce); +out_ce: + intel_context_put(ce); + } while (!ret); + + if (igt_flush_test(i915)) ret = -EIO; - intel_context_unpin(ce); -out_context: - intel_context_put(ce); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); +out_put: i915_gem_object_put(obj); out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - if (flags & TEST_RESET) - igt_global_reset_unlock(i915); - - mock_file_free(i915, file); + igt_global_reset_unlock(&i915->gt); if (ret) pr_err("%s: Failed with %d!\n", name, ret); @@ -1161,15 +1342,18 @@ static int igt_ctx_sseu(void *arg) static int igt_ctx_readonly(void *arg) { struct drm_i915_private *i915 = arg; + unsigned long idx, ndwords, dw, num_engines; struct drm_i915_gem_object *obj = NULL; + struct i915_request *tq[5] = {}; + struct i915_gem_engines_iter it; struct i915_address_space *vm; struct i915_gem_context *ctx; - unsigned long idx, ndwords, dw; + struct intel_context *ce; struct igt_live_test t; - struct drm_file *file; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); LIST_HEAD(objects); + struct file *file; int err = -ENODEV; /* @@ -1182,56 +1366,63 @@ static int igt_ctx_readonly(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm; + vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { err = 0; - goto out_unlock; + goto out_file; } + num_engines = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) + if (intel_engine_can_store_dword(ce->engine)) + num_engines++; + i915_gem_context_unlock_engines(ctx); + ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { - struct intel_engine_cs *engine; - unsigned int id; - - for_each_engine(engine, i915, id) { - intel_wakeref_t wakeref; - - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (prandom_u32_state(&prng) & 1) i915_gem_object_set_readonly(obj); } - err = 0; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + ce->engine->name, + yesno(!!ctx_vm(ctx)), + err); + i915_gem_context_unlock_engines(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -1240,9 +1431,10 @@ static int igt_ctx_readonly(void *arg) } ndwords++; } + i915_gem_context_unlock_engines(ctx); } - pr_info("Submitted %lu dwords (across %u engines)\n", - ndwords, RUNTIME_INFO(i915)->num_engines); + pr_info("Submitted %lu dwords (across %lu engines)\n", + ndwords, num_engines); dw = 0; idx = 0; @@ -1262,19 +1454,19 @@ static int igt_ctx_readonly(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); + fput(file); return err; } -static int check_scratch(struct i915_gem_context *ctx, u64 offset) +static int check_scratch(struct i915_address_space *vm, u64 offset) { struct drm_mm_node *node = - __drm_mm_interval_first(&ctx->vm->mm, + __drm_mm_interval_first(&vm->mm, offset, offset + sizeof(u32) - 1); if (!node || node->start > offset) return 0; @@ -1292,6 +1484,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *vma; u32 *cmd; @@ -1306,7 +1499,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto out; } *cmd++ = MI_STORE_DWORD_IMM_GEN4; @@ -1322,17 +1515,20 @@ static int write_to_scratch(struct i915_gem_context *ctx, __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto out_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto out_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1347,26 +1543,27 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto err_request; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, 0); i915_vma_unlock(vma); if (err) goto skip_request; i915_vma_unpin(vma); - i915_vma_close(vma); - i915_vma_put(vma); i915_request_add(rq); - return 0; - + goto out_vm; skip_request: i915_request_skip(rq, err); err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); -err: +out_vm: + i915_vm_put(vm); +out: i915_gem_object_put(obj); return err; } @@ -1377,6 +1574,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ const u32 result = 0x100; struct i915_request *rq; @@ -1393,7 +1591,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto out; } memset(cmd, POISON_INUSE, PAGE_SIZE); @@ -1419,17 +1617,20 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto out_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto out_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1444,7 +1645,9 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto err_request; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto skip_request; @@ -1458,27 +1661,27 @@ static int read_from_scratch(struct i915_gem_context *ctx, err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) - goto err; + goto out_vm; cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto out_vm; } *value = cmd[result / sizeof(*cmd)]; i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - - return 0; + goto out_vm; skip_request: i915_request_skip(rq, err); err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); -err: +out_vm: + i915_vm_put(vm); +out: i915_gem_object_put(obj); return err; } @@ -1487,13 +1690,11 @@ static int igt_vm_isolation(void *arg) { struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx_a, *ctx_b; + unsigned long num_engines, count; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; - struct drm_file *file; I915_RND_STATE(prng); - unsigned long count; - unsigned int id; + struct file *file; u64 vm_total; int err; @@ -1509,36 +1710,33 @@ static int igt_vm_isolation(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx_a = live_context(i915, file); if (IS_ERR(ctx_a)) { err = PTR_ERR(ctx_a); - goto out_unlock; + goto out_file; } ctx_b = live_context(i915, file); if (IS_ERR(ctx_b)) { err = PTR_ERR(ctx_b); - goto out_unlock; + goto out_file; } /* We can only test vm isolation, if the vm are distinct */ - if (ctx_a->vm == ctx_b->vm) - goto out_unlock; + if (ctx_vm(ctx_a) == ctx_vm(ctx_b)) + goto out_file; - vm_total = ctx_a->vm->total; - GEM_BUG_ON(ctx_b->vm->total != vm_total); + vm_total = ctx_vm(ctx_a)->total; + GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total); vm_total -= I915_GTT_PAGE_SIZE; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; - for_each_engine(engine, i915, id) { + num_engines = 0; + for_each_uabi_engine(engine, i915) { IGT_TIMEOUT(end_time); unsigned long this = 0; @@ -1551,7 +1749,7 @@ static int igt_vm_isolation(void *arg) div64_u64_rem(i915_prandom_u64_state(&prng), vm_total, &offset); - offset &= -sizeof(u32); + offset = round_down(offset, alignof_dword); offset += I915_GTT_PAGE_SIZE; err = write_to_scratch(ctx_a, engine, @@ -1560,7 +1758,7 @@ static int igt_vm_isolation(void *arg) err = read_from_scratch(ctx_b, engine, offset, &value); if (err) - goto out_rpm; + goto out_file; if (value) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1569,42 +1767,24 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_rpm; + goto out_file; } this++; } count += this; + num_engines++; } - pr_info("Checked %lu scratch offsets across %d engines\n", - count, RUNTIME_INFO(i915)->num_engines); + pr_info("Checked %lu scratch offsets across %lu engines\n", + count, num_engines); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); -out_unlock: +out_file: if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); + fput(file); return err; } -static __maybe_unused const char * -__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (engines == ALL_ENGINES) - return "all"; - - for_each_engine_masked(engine, i915, engines, tmp) - return engine->name; - - return "none"; -} - static bool skip_unused_engines(struct intel_context *ce, void *data) { return !ce->state; @@ -1632,13 +1812,9 @@ static int mock_context_barrier(void *arg) * a request; useful for retiring old state after loading new. */ - mutex_lock(&i915->drm.struct_mutex); - ctx = mock_context(i915, "mock"); - if (!ctx) { - err = -ENOMEM; - goto unlock; - } + if (!ctx) + return -ENOMEM; counter = 0; err = context_barrier_task(ctx, 0, @@ -1711,8 +1887,6 @@ static int mock_context_barrier(void *arg) out: mock_context_close(ctx); -unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; #undef pr_fmt #define pr_fmt(x) x @@ -1736,10 +1910,11 @@ int i915_gem_context_mock_selftests(void) return err; } -int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) +int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_switch), + SUBTEST(live_parallel_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), SUBTEST(igt_ctx_sseu), @@ -1747,8 +1922,8 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) SUBTEST(igt_vm_isolation), }; - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_subtests(tests, dev_priv); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index e3a64edef918..2a52b92586b9 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -20,7 +20,7 @@ static int igt_dmabuf_export(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); i915_gem_object_put(obj); if (IS_ERR(dmabuf)) { pr_err("i915_gem_prime_export failed with err=%d\n", @@ -44,7 +44,7 @@ static int igt_dmabuf_import_self(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); if (IS_ERR(dmabuf)) { pr_err("i915_gem_prime_export failed with err=%d\n", (int)PTR_ERR(dmabuf)); @@ -219,7 +219,7 @@ static int igt_dmabuf_export_vmap(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); if (IS_ERR(dmabuf)) { pr_err("i915_gem_prime_export failed with err=%d\n", (int)PTR_ERR(dmabuf)); @@ -254,106 +254,6 @@ err_obj: return err; } -static int igt_dmabuf_export_kmap(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *ptr; - int err; - - obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - i915_gem_object_put(obj); - if (IS_ERR(dmabuf)) { - err = PTR_ERR(dmabuf); - pr_err("i915_gem_prime_export failed with err=%d\n", err); - return err; - } - - ptr = dma_buf_kmap(dmabuf, 0); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - - if (memchr_inv(ptr, 0, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 0, ptr); - pr_err("Exported page[0] not initialiased to zero!\n"); - err = -EINVAL; - goto err; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_kunmap(dmabuf, 0, ptr); - - ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - pr_err("i915_gem_object_pin_map failed with err=%d\n", err); - goto err; - } - memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE); - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - ptr = dma_buf_kmap(dmabuf, 1); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - - if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 1, ptr); - pr_err("Exported page[1] not set to 0xaa!\n"); - err = -EINVAL; - goto err; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_kunmap(dmabuf, 1, ptr); - - ptr = dma_buf_kmap(dmabuf, 0); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 0, ptr); - pr_err("Exported page[0] did not retain 0xc5!\n"); - err = -EINVAL; - goto err; - } - dma_buf_kunmap(dmabuf, 0, ptr); - - ptr = dma_buf_kmap(dmabuf, 2); - if (ptr) { - pr_err("Erroneously kmapped beyond the end of the object!\n"); - dma_buf_kunmap(dmabuf, 2, ptr); - err = -EINVAL; - goto err; - } - - ptr = dma_buf_kmap(dmabuf, -1); - if (ptr) { - pr_err("Erroneously kmapped before the start of the object!\n"); - dma_buf_kunmap(dmabuf, -1, ptr); - err = -EINVAL; - goto err; - } - - err = 0; -err: - dma_buf_put(dmabuf); - return err; -} - int i915_gem_dmabuf_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -362,7 +262,6 @@ int i915_gem_dmabuf_mock_selftests(void) SUBTEST(igt_dmabuf_import), SUBTEST(igt_dmabuf_import_ownership), SUBTEST(igt_dmabuf_export_vmap), - SUBTEST(igt_dmabuf_export_kmap), }; struct drm_i915_private *i915; int err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5c81f4b4813a..ef7c74cff28a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -6,10 +6,15 @@ #include <linux/prime_numbers.h> +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gem/i915_gem_region.h" #include "huge_gem_object.h" #include "i915_selftest.h" +#include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" +#include "selftests/igt_mmap.h" struct tile { unsigned int width; @@ -75,18 +80,103 @@ static u64 tiled_offset(const struct tile *tile, u64 v) static int check_partial_mapping(struct drm_i915_gem_object *obj, const struct tile *tile, - unsigned long end_time) + struct rnd_state *prng) { - const unsigned int nreal = obj->scratch / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_ggtt_view view; struct i915_vma *vma; unsigned long page; + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; int err; - if (igt_timeout(end_time, - "%s: timed out before tiling=%d stride=%d\n", - __func__, tile->tiling, tile->stride)) - return -EINTR; + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) { + pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", + tile->tiling, tile->stride, err); + return err; + } + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); + if (err) { + pr_err("Failed to flush to GTT write domain; err=%d\n", err); + return err; + } + + page = i915_prandom_u32_max_state(npages, prng); + view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(vma)); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(io)); + err = PTR_ERR(io); + goto out; + } + + iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + goto out; + + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile->tiling ? tile_row_pages(obj) : 0, + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + +out: + __i915_vma_put(vma); + return err; +} + +static int check_partial_mappings(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); if (err) { @@ -143,7 +233,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; - i915_gem_flush_ggtt_writes(to_i915(obj->base.dev)); + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); @@ -168,12 +258,43 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (err) return err; - i915_vma_destroy(vma); + __i915_vma_put(vma); + + if (igt_timeout(end_time, + "%s: timed out after tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; } return 0; } +static unsigned int +setup_tile_size(struct tile *tile, struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) <= 2) { + tile->height = 16; + tile->width = 128; + tile->size = 11; + } else if (tile->tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile->height = 32; + tile->width = 128; + tile->size = 12; + } else { + tile->height = 8; + tile->width = 512; + tile->size = 12; + } + + if (INTEL_GEN(i915) < 4) + return 8192 / tile->width; + else if (INTEL_GEN(i915) < 7) + return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width; + else + return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width; +} + static int igt_partial_tiling(void *arg) { const unsigned int nreal = 1 << 12; /* largest tile row x2 */ @@ -183,6 +304,9 @@ static int igt_partial_tiling(void *arg) int tiling; int err; + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; + /* We want to check the page mapping and fencing of a large object * mmapped through the GTT. The object we create is larger than can * possibly be mmaped as a whole, and so we must use partial GGTT vma. @@ -204,7 +328,6 @@ static int igt_partial_tiling(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (1) { @@ -218,7 +341,7 @@ static int igt_partial_tiling(void *arg) tile.swizzle = I915_BIT_6_SWIZZLE_NONE; tile.tiling = I915_TILING_NONE; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err && err != -EINTR) goto out_unlock; } @@ -240,10 +363,10 @@ static int igt_partial_tiling(void *arg) tile.tiling = tiling; switch (tiling) { case I915_TILING_X: - tile.swizzle = i915->mm.bit_6_swizzle_x; + tile.swizzle = i915->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - tile.swizzle = i915->mm.bit_6_swizzle_y; + tile.swizzle = i915->ggtt.bit_6_swizzle_y; break; } @@ -252,31 +375,11 @@ static int igt_partial_tiling(void *arg) tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) continue; - if (INTEL_GEN(i915) <= 2) { - tile.height = 16; - tile.width = 128; - tile.size = 11; - } else if (tile.tiling == I915_TILING_Y && - HAS_128_BYTE_Y_TILING(i915)) { - tile.height = 32; - tile.width = 128; - tile.size = 12; - } else { - tile.height = 8; - tile.width = 512; - tile.size = 12; - } - - if (INTEL_GEN(i915) < 4) - max_pitch = 8192 / tile.width; - else if (INTEL_GEN(i915) < 7) - max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; - else - max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + max_pitch = setup_tile_size(&tile, i915); for (pitch = max_pitch; pitch; pitch >>= 1) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -284,7 +387,7 @@ static int igt_partial_tiling(void *arg) if (pitch > 2 && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch - 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -293,7 +396,7 @@ static int igt_partial_tiling(void *arg) if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch + 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -304,7 +407,7 @@ static int igt_partial_tiling(void *arg) if (INTEL_GEN(i915) >= 4) { for_each_prime_number(pitch, max_pitch) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -317,82 +420,188 @@ next_tiling: ; out_unlock: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); return err; } -static int make_obj_busy(struct drm_i915_gem_object *obj) +static int igt_smoke_tiling(void *arg) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_request *rq; - struct i915_vma *vma; + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + I915_RND_STATE(prng); + unsigned long count; + IGT_TIMEOUT(end); int err; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; + /* + * igt_partial_tiling() does an exhastive check of partial tiling + * chunking, but will undoubtably run out of time. Here, we do a + * randomised search and hope over many runs of 1s with different + * seeds we will do a thorough check. + * + * Remember to look at the st_seed if we see a flip-flop in BAT! + */ - rq = i915_request_create(i915->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); + if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) + return 0; + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; } - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); - i915_request_add(rq); + count = 0; + do { + struct tile tile; - i915_vma_unpin(vma); - i915_gem_object_put(obj); /* leave it only alive via its active ref */ + tile.tiling = + i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng); + switch (tile.tiling) { + case I915_TILING_NONE: + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + break; + + case I915_TILING_X: + tile.swizzle = i915->ggtt.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->ggtt.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (tile.tiling != I915_TILING_NONE) { + unsigned int max_pitch = setup_tile_size(&tile, i915); + + tile.stride = + i915_prandom_u32_max_state(max_pitch, &prng); + tile.stride = (1 + tile.stride) * tile.width; + if (INTEL_GEN(i915) < 4) + tile.stride = rounddown_pow_of_two(tile.stride); + } + + err = check_partial_mapping(obj, &tile, &prng); + if (err) + break; + + count++; + } while (!__igt_timeout(end, NULL)); + pr_info("%s: Completed %lu trials\n", __func__, count); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); return err; } +static int make_obj_busy(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_engine_cs *engine; + + for_each_uabi_engine(engine, i915) { + struct i915_request *rq; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, + EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + + i915_request_add(rq); + i915_vma_unpin(vma); + if (err) + return err; + } + + i915_gem_object_put(obj); /* leave it only alive via its active ref */ + return 0; +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) { struct drm_i915_gem_object *obj; - int err; + struct i915_mmap_offset *mmo; obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) return PTR_ERR(obj); - err = create_mmap_offset(obj); + mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); i915_gem_object_put(obj); - return err == expected; + return PTR_ERR_OR_ZERO(mmo) == expected; } static void disable_retire_worker(struct drm_i915_private *i915) { - i915_gem_shrinker_unregister(i915); - - intel_gt_pm_get(i915); - - cancel_delayed_work_sync(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); + i915_gem_driver_unregister__shrinker(i915); + intel_gt_pm_get(&i915->gt); + cancel_delayed_work_sync(&i915->gt.requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) { - intel_gt_pm_put(i915); + igt_flush_test(i915); + intel_gt_pm_put(&i915->gt); + i915_gem_driver_register__shrinker(i915); +} - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); +static void mmap_offset_lock(struct drm_i915_private *i915) + __acquires(&i915->drm.vma_offset_manager->vm_lock) +{ + write_lock(&i915->drm.vma_offset_manager->vm_lock); +} - i915_gem_shrinker_register(i915); +static void mmap_offset_unlock(struct drm_i915_private *i915) + __releases(&i915->drm.vma_offset_manager->vm_lock) +{ + write_unlock(&i915->drm.vma_offset_manager->vm_lock); } static int igt_mmap_offset_exhaustion(void *arg) @@ -400,26 +609,50 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_i915_private *i915 = arg; struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; struct drm_i915_gem_object *obj; - struct drm_mm_node resv, *hole; - u64 hole_start, hole_end; - int loop, err; + struct drm_mm_node *hole, *next; + struct i915_mmap_offset *mmo; + int loop, err = 0; /* Disable background reaper */ disable_retire_worker(i915); GEM_BUG_ON(!i915->gt.awake); + intel_gt_retire_requests(&i915->gt); + i915_gem_drain_freed_objects(i915); /* Trim the device mmap space to only a page */ - memset(&resv, 0, sizeof(resv)); - drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { - resv.start = hole_start; - resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ - err = drm_mm_reserve_node(mm, &resv); + mmap_offset_lock(i915); + loop = 1; /* PAGE_SIZE units */ + list_for_each_entry_safe(hole, next, &mm->hole_stack, hole_stack) { + struct drm_mm_node *resv; + + resv = kzalloc(sizeof(*resv), GFP_NOWAIT); + if (!resv) { + err = -ENOMEM; + goto out_park; + } + + resv->start = drm_mm_hole_node_start(hole) + loop; + resv->size = hole->hole_size - loop; + resv->color = -1ul; + loop = 0; + + if (!resv->size) { + kfree(resv); + continue; + } + + pr_debug("Reserving hole [%llx + %llx]\n", + resv->start, resv->size); + + err = drm_mm_reserve_node(mm, resv); if (err) { pr_err("Failed to trim VMA manager, err=%d\n", err); + kfree(resv); goto out_park; } - break; } + GEM_BUG_ON(!list_is_singular(&mm->hole_stack)); + mmap_offset_unlock(i915); /* Just fits! */ if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { @@ -442,9 +675,10 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - err = create_mmap_offset(obj); - if (err) { + mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); + if (IS_ERR(mmo)) { pr_err("Unable to insert object into reclaimed hole\n"); + err = PTR_ERR(mmo); goto err_obj; } @@ -458,7 +692,7 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) break; obj = i915_gem_object_create_internal(i915, PAGE_SIZE); @@ -467,27 +701,24 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = make_obj_busy(obj); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); goto err_obj; } - - /* NB we rely on the _active_ reference to access obj now */ - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - err = create_mmap_offset(obj); - if (err) { - pr_err("[loop %d] create_mmap_offset failed with err=%d\n", - loop, err); - goto out; - } } out: - drm_mm_remove_node(&resv); + mmap_offset_lock(i915); out_park: + drm_mm_for_each_node_safe(hole, next, mm) { + if (hole->color != -1ul) + continue; + + drm_mm_remove_node(hole); + kfree(hole); + } + mmap_offset_unlock(i915); restore_retire_worker(i915); return err; err_obj: @@ -495,11 +726,515 @@ err_obj: goto out; } +static int gtt_set(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + void __iomem *map; + int err = 0; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out; + } + + memset_io(map, POISON_INUSE, obj->base.size); + i915_vma_unpin_iomap(vma); + +out: + intel_gt_pm_put(vma->vm->gt); + return err; +} + +static int gtt_check(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + void __iomem *map; + int err = 0; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out; + } + + if (memchr_inv((void __force *)map, POISON_FREE, obj->base.size)) { + pr_err("%s: Write via mmap did not land in backing store (GTT)\n", + obj->mm.region->name); + err = -EINVAL; + } + i915_vma_unpin_iomap(vma); + +out: + intel_gt_pm_put(vma->vm->gt); + return err; +} + +static int wc_set(struct drm_i915_gem_object *obj) +{ + void *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + memset(vaddr, POISON_INUSE, obj->base.size); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int wc_check(struct drm_i915_gem_object *obj) +{ + void *vaddr; + int err = 0; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + if (memchr_inv(vaddr, POISON_FREE, obj->base.size)) { + pr_err("%s: Write via mmap did not land in backing store (WC)\n", + obj->mm.region->name); + err = -EINVAL; + } + i915_gem_object_unpin_map(obj); + + return err; +} + +static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) +{ + if (type == I915_MMAP_TYPE_GTT && + !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) + return false; + + if (type != I915_MMAP_TYPE_GTT && + !i915_gem_object_type_has(obj, + I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_HAS_IOMEM)) + return false; + + return true; +} + +#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) +static int __igt_mmap(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) +{ + struct i915_mmap_offset *mmo; + struct vm_area_struct *area; + unsigned long addr; + int err, i; + + if (!can_mmap(obj, type)) + return 0; + + err = wc_set(obj); + if (err == -ENXIO) + err = gtt_set(obj); + if (err) + return err; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) + return addr; + + pr_debug("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, type, addr); + + area = find_vma(current->mm, addr); + if (!area) { + pr_err("%s: Did not create a vm_area_struct for the mmap\n", + obj->mm.region->name); + err = -EINVAL; + goto out_unmap; + } + + if (area->vm_private_data != mmo) { + pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n", + obj->mm.region->name); + err = -EINVAL; + goto out_unmap; + } + + for (i = 0; i < obj->base.size / sizeof(u32); i++) { + u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); + u32 x; + + if (get_user(x, ux)) { + pr_err("%s: Unable to read from mmap, offset:%zd\n", + obj->mm.region->name, i * sizeof(x)); + err = -EFAULT; + goto out_unmap; + } + + if (x != expand32(POISON_INUSE)) { + pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", + obj->mm.region->name, + i * sizeof(x), x, expand32(POISON_INUSE)); + err = -EINVAL; + goto out_unmap; + } + + x = expand32(POISON_FREE); + if (put_user(x, ux)) { + pr_err("%s: Unable to write to mmap, offset:%zd\n", + obj->mm.region->name, i * sizeof(x)); + err = -EFAULT; + goto out_unmap; + } + } + + if (type == I915_MMAP_TYPE_GTT) + intel_gt_flush_ggtt_writes(&i915->gt); + + err = wc_check(obj); + if (err == -ENXIO) + err = gtt_check(obj); +out_unmap: + vm_munmap(addr, obj->base.size); + return err; +} + +static int igt_mmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + unsigned long sizes[] = { + PAGE_SIZE, + mr->min_page_size, + SZ_4M, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, sizes[i], 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC); + + i915_gem_object_put(obj); + if (err) + return err; + } + } + + return 0; +} + +static int __igt_mmap_gpu(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) +{ + struct intel_engine_cs *engine; + struct i915_mmap_offset *mmo; + unsigned long addr; + u32 __user *ux; + u32 bbe; + int err; + + /* + * Verify that the mmap access into the backing store aligns with + * that of the GPU, i.e. that mmap is indeed writing into the same + * page as being read by the GPU. + */ + + if (!can_mmap(obj, type)) + return 0; + + err = wc_set(obj); + if (err == -ENXIO) + err = gtt_set(obj); + if (err) + return err; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) + return addr; + + ux = u64_to_user_ptr((u64)addr); + bbe = MI_BATCH_BUFFER_END; + if (put_user(bbe, ux)) { + pr_err("%s: Unable to write to mmap\n", obj->mm.region->name); + err = -EFAULT; + goto out_unmap; + } + + if (type == I915_MMAP_TYPE_GTT) + intel_gt_flush_ggtt_writes(&i915->gt); + + for_each_uabi_engine(engine, i915) { + struct i915_request *rq; + struct i915_vma *vma; + + vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unmap; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_unmap; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + + err = engine->emit_bb_start(rq, vma->node.start, 0, 0); + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(engine->i915->drm.dev); + + pr_err("%s(%s, %s): Failed to execute batch\n", + __func__, engine->name, obj->mm.region->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + intel_gt_set_wedged(engine->gt); + err = -EIO; + } + i915_request_put(rq); + +out_unpin: + i915_vma_unpin(vma); + if (err) + goto out_unmap; + } + +out_unmap: + vm_munmap(addr, obj->base.size); + return err; +} + +static int igt_mmap_gpu(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC); + + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + +static int check_present_pte(pte_t *pte, unsigned long addr, void *data) +{ + if (!pte_present(*pte) || pte_none(*pte)) { + pr_err("missing PTE:%lx\n", + (addr - (unsigned long)data) >> PAGE_SHIFT); + return -EINVAL; + } + + return 0; +} + +static int check_absent_pte(pte_t *pte, unsigned long addr, void *data) +{ + if (pte_present(*pte) && !pte_none(*pte)) { + pr_err("present PTE:%lx; expected to be revoked\n", + (addr - (unsigned long)data) >> PAGE_SHIFT); + return -EINVAL; + } + + return 0; +} + +static int check_present(unsigned long addr, unsigned long len) +{ + return apply_to_page_range(current->mm, addr, len, + check_present_pte, (void *)addr); +} + +static int check_absent(unsigned long addr, unsigned long len) +{ + return apply_to_page_range(current->mm, addr, len, + check_absent_pte, (void *)addr); +} + +static int prefault_range(u64 start, u64 len) +{ + const char __user *addr, *end; + char __maybe_unused c; + int err; + + addr = u64_to_user_ptr(start); + end = addr + len; + + for (; addr < end; addr += PAGE_SIZE) { + err = __get_user(c, addr); + if (err) + return err; + } + + return __get_user(c, end - 1); +} + +static int __igt_mmap_revoke(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) +{ + struct i915_mmap_offset *mmo; + unsigned long addr; + int err; + + if (!can_mmap(obj, type)) + return 0; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) + return addr; + + err = prefault_range(addr, obj->base.size); + if (err) + goto out_unmap; + + GEM_BUG_ON(mmo->mmap_type == I915_MMAP_TYPE_GTT && + !atomic_read(&obj->bind_count)); + + err = check_present(addr, obj->base.size); + if (err) { + pr_err("%s: was not present\n", obj->mm.region->name); + goto out_unmap; + } + + /* + * After unbinding the object from the GGTT, its address may be reused + * for other objects. Ergo we have to revoke the previous mmap PTE + * access as it no longer points to the same object. + */ + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (err) { + pr_err("Failed to unbind object!\n"); + goto out_unmap; + } + GEM_BUG_ON(atomic_read(&obj->bind_count)); + + if (type != I915_MMAP_TYPE_GTT) { + __i915_gem_object_put_pages(obj); + if (i915_gem_object_has_pages(obj)) { + pr_err("Failed to put-pages object!\n"); + err = -EINVAL; + goto out_unmap; + } + } + + err = check_absent(addr, obj->base.size); + if (err) { + pr_err("%s: was not absent\n", obj->mm.region->name); + goto out_unmap; + } + +out_unmap: + vm_munmap(addr, obj->base.size); + return err; +} + +static int igt_mmap_revoke(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC); + + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + int i915_gem_mman_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_partial_tiling), + SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), + SUBTEST(igt_mmap), + SUBTEST(igt_mmap_revoke), + SUBTEST(igt_mmap_gpu), }; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index e23d8c9e9298..62077fe46715 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -3,34 +3,254 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/sort.h> + +#include "gt/intel_gt.h" +#include "gt/intel_engine_user.h" + #include "i915_selftest.h" +#include "gem/i915_gem_context.h" #include "selftests/igt_flush_test.h" +#include "selftests/i915_random.h" #include "selftests/mock_drm.h" +#include "huge_gem_object.h" #include "mock_context.h" -static int igt_fill_blt(void *arg) +static int wrap_ktime_compare(const void *A, const void *B) { - struct intel_context *ce = arg; - struct drm_i915_private *i915 = ce->gem_context->i915; - struct drm_i915_gem_object *obj; + const ktime_t *a = A, *b = B; + + return ktime_compare(*a, *b); +} + +static int __perf_fill_blt(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + int inst = 0; + + do { + struct intel_engine_cs *engine; + ktime_t t[5]; + int pass; + int err; + + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + intel_engine_pm_get(engine); + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct intel_context *ce = engine->kernel_context; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = i915_gem_object_fill_blt(obj, ce, 0); + if (err) + break; + + err = i915_gem_object_wait(obj, + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + intel_engine_pm_put(engine); + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", + engine->name, + obj->base.size >> 10, + div64_u64(mul_u32_u32(4 * obj->base.size, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + } while (1); +} + +static int perf_fill_blt(void *arg) +{ + struct drm_i915_private *i915 = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __perf_fill_blt(obj); + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + +static int __perf_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst) +{ + struct drm_i915_private *i915 = to_i915(src->base.dev); + int inst = 0; + + do { + struct intel_engine_cs *engine; + ktime_t t[5]; + int pass; + int err = 0; + + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + intel_engine_pm_get(engine); + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct intel_context *ce = engine->kernel_context; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = i915_gem_object_copy_blt(src, dst, ce); + if (err) + break; + + err = i915_gem_object_wait(dst, + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + intel_engine_pm_put(engine); + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", + engine->name, + src->base.size >> 10, + div64_u64(mul_u32_u32(4 * src->base.size, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + } while (1); +} + +static int perf_copy_blt(void *arg) +{ + struct drm_i915_private *i915 = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *src, *dst; + int err; + + src = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(src)) + return PTR_ERR(src); + + dst = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_src; + } + + err = __perf_copy_blt(src, dst); + + i915_gem_object_put(dst); +err_src: + i915_gem_object_put(src); + if (err) + return err; + } + + return 0; +} + +struct igt_thread_arg { + struct drm_i915_private *i915; + struct i915_gem_context *ctx; + struct file *file; struct rnd_state prng; + unsigned int n_cpus; +}; + +static int igt_fill_blt_thread(void *arg) +{ + struct igt_thread_arg *thread = arg; + struct drm_i915_private *i915 = thread->i915; + struct rnd_state *prng = &thread->prng; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct intel_context *ce; + unsigned int prio; IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; + int err; + + ctx = thread->ctx; + if (!ctx) { + ctx = live_context(i915, thread->file); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); + } - prandom_seed_state(&prng, i915_selftest.random_seed); + ce = i915_gem_context_get_engine(ctx, BCS0); + GEM_BUG_ON(IS_ERR(ce)); do { - u32 sz = prandom_u32_state(&prng) % SZ_32M; - u32 val = prandom_u32_state(&prng); + const u32 max_block_size = S16_MAX * PAGE_SIZE; + u32 val = prandom_u32_state(prng); + u64 total = ce->vm->total; + u32 phys_sz; + u32 sz; + u32 *vaddr; u32 i; + /* + * If we have a tiny shared address space, like for the GGTT + * then we can't be too greedy. + */ + if (i915_is_ggtt(ce->vm)) + total = div64_u64(total, thread->n_cpus); + + sz = min_t(u64, total >> 4, prandom_u32_state(prng)); + phys_sz = sz % (max_block_size + 1); + sz = round_up(sz, PAGE_SIZE); + phys_sz = round_up(phys_sz, PAGE_SIZE); - pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); + pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, + phys_sz, sz, val); - obj = i915_gem_object_create_internal(i915, sz); + obj = huge_gem_object(i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -46,14 +266,13 @@ static int igt_fill_blt(void *arg) * Make sure the potentially async clflush does its job, if * required. */ - memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32)); + memset32(vaddr, val ^ 0xdeadbeaf, + huge_gem_object_phys_size(obj) / sizeof(u32)); if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_fill_blt(obj, ce, val); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; @@ -63,7 +282,7 @@ static int igt_fill_blt(void *arg) if (err) goto err_unpin; - for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { if (vaddr[i] != val) { pr_err("vaddr[%u]=%x, expected=%x\n", i, vaddr[i], val); @@ -83,28 +302,261 @@ err_unpin: err_put: i915_gem_object_put(obj); err_flush: - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); + if (err == -ENOMEM) + err = 0; + + intel_context_put(ce); + return err; +} + +static int igt_copy_blt_thread(void *arg) +{ + struct igt_thread_arg *thread = arg; + struct drm_i915_private *i915 = thread->i915; + struct rnd_state *prng = &thread->prng; + struct drm_i915_gem_object *src, *dst; + struct i915_gem_context *ctx; + struct intel_context *ce; + unsigned int prio; + IGT_TIMEOUT(end); + int err; + + ctx = thread->ctx; + if (!ctx) { + ctx = live_context(i915, thread->file); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); + } + + ce = i915_gem_context_get_engine(ctx, BCS0); + GEM_BUG_ON(IS_ERR(ce)); + + do { + const u32 max_block_size = S16_MAX * PAGE_SIZE; + u32 val = prandom_u32_state(prng); + u64 total = ce->vm->total; + u32 phys_sz; + u32 sz; + u32 *vaddr; + u32 i; + + if (i915_is_ggtt(ce->vm)) + total = div64_u64(total, thread->n_cpus); + + sz = min_t(u64, total >> 4, prandom_u32_state(prng)); + phys_sz = sz % (max_block_size + 1); + + sz = round_up(sz, PAGE_SIZE); + phys_sz = round_up(phys_sz, PAGE_SIZE); + + pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, + phys_sz, sz, val); + + src = huge_gem_object(i915, phys_sz, sz); + if (IS_ERR(src)) { + err = PTR_ERR(src); + goto err_flush; + } + + vaddr = i915_gem_object_pin_map(src, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put_src; + } + + memset32(vaddr, val, + huge_gem_object_phys_size(src) / sizeof(u32)); + + i915_gem_object_unpin_map(src); + + if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + src->cache_dirty = true; + + dst = huge_gem_object(i915, phys_sz, sz); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_put_src; + } + + vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put_dst; + } + + memset32(vaddr, val ^ 0xdeadbeaf, + huge_gem_object_phys_size(dst) / sizeof(u32)); + + if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + dst->cache_dirty = true; + + err = i915_gem_object_copy_blt(src, dst, ce); + if (err) + goto err_unpin; + + i915_gem_object_lock(dst); + err = i915_gem_object_set_to_cpu_domain(dst, false); + i915_gem_object_unlock(dst); + if (err) + goto err_unpin; + for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) { + if (vaddr[i] != val) { + pr_err("vaddr[%u]=%x, expected=%x\n", i, + vaddr[i], val); + err = -EINVAL; + goto err_unpin; + } + } + + i915_gem_object_unpin_map(dst); + + i915_gem_object_put(src); + i915_gem_object_put(dst); + } while (!time_after(jiffies, end)); + + goto err_flush; + +err_unpin: + i915_gem_object_unpin_map(dst); +err_put_dst: + i915_gem_object_put(dst); +err_put_src: + i915_gem_object_put(src); +err_flush: if (err == -ENOMEM) err = 0; + intel_context_put(ce); + return err; +} + +static int igt_threaded_blt(struct drm_i915_private *i915, + int (*blt_fn)(void *arg), + unsigned int flags) +#define SINGLE_CTX BIT(0) +{ + struct igt_thread_arg *thread; + struct task_struct **tsk; + unsigned int n_cpus, i; + I915_RND_STATE(prng); + int err = 0; + + n_cpus = num_online_cpus() + 1; + + tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); + if (!tsk) + return 0; + + thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); + if (!thread) + goto out_tsk; + + thread[0].file = mock_file(i915); + if (IS_ERR(thread[0].file)) { + err = PTR_ERR(thread[0].file); + goto out_thread; + } + + if (flags & SINGLE_CTX) { + thread[0].ctx = live_context(i915, thread[0].file); + if (IS_ERR(thread[0].ctx)) { + err = PTR_ERR(thread[0].ctx); + goto out_file; + } + } + + for (i = 0; i < n_cpus; ++i) { + thread[i].i915 = i915; + thread[i].file = thread[0].file; + thread[i].ctx = thread[0].ctx; + thread[i].n_cpus = n_cpus; + thread[i].prng = + I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); + + tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); + if (IS_ERR(tsk[i])) { + err = PTR_ERR(tsk[i]); + break; + } + + get_task_struct(tsk[i]); + } + + yield(); /* start all threads before we kthread_stop() */ + + for (i = 0; i < n_cpus; ++i) { + int status; + + if (IS_ERR_OR_NULL(tsk[i])) + continue; + + status = kthread_stop(tsk[i]); + if (status && !err) + err = status; + + put_task_struct(tsk[i]); + } + +out_file: + fput(thread[0].file); +out_thread: + kfree(thread); +out_tsk: + kfree(tsk); return err; } +static int igt_fill_blt(void *arg) +{ + return igt_threaded_blt(arg, igt_fill_blt_thread, 0); +} + +static int igt_fill_blt_ctx0(void *arg) +{ + return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX); +} + +static int igt_copy_blt(void *arg) +{ + return igt_threaded_blt(arg, igt_copy_blt_thread, 0); +} + +static int igt_copy_blt_ctx0(void *arg) +{ + return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX); +} + int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_fill_blt), + SUBTEST(igt_fill_blt_ctx0), + SUBTEST(igt_copy_blt), + SUBTEST(igt_copy_blt_ctx0), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; if (!HAS_ENGINE(i915, BCS0)) return 0; - return i915_subtests(tests, i915->engine[BCS0]->kernel_context); + return i915_live_subtests(tests, i915); +} + +int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_fill_blt), + SUBTEST(perf_copy_blt), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 94a15e3f6db8..34932871b3a5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,9 +25,7 @@ static int mock_phys_object(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); goto out_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index b232e6d2cd92..6718da20f35d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -9,6 +9,9 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_gt.h" +#include "i915_vma.h" +#include "i915_drv.h" #include "i915_request.h" @@ -23,7 +26,7 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ce = i915_gem_context_get_engine(ctx, engine->id); + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); if (IS_ERR(ce)) return ERR_CAST(ce); @@ -32,3 +35,134 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) return rq; } + +struct i915_vma * +igt_emit_store_dw(struct i915_vma *vma, + u64 offset, + unsigned long count, + u32 val) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = val; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? MI_USE_GGTT : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = val; + } else { + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cmd++ = offset; + *cmd++ = val; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + intel_gt_chipset_flush(vma->vm->gt); + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val) +{ + struct i915_request *rq; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + + batch = igt_emit_store_dw(vma, offset, count, val); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + flags = 0; + if (INTEL_GEN(ce->vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto skip_request; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + if (err) + goto skip_request; + + i915_request_add(rq); + + i915_vma_unpin_and_release(&batch, 0); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin_and_release(&batch, 0); + return err; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h index 0f17251cf75d..4221cf84d175 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -7,11 +7,26 @@ #ifndef __IGT_GEM_UTILS_H__ #define __IGT_GEM_UTILS_H__ +#include <linux/types.h> + struct i915_request; struct i915_gem_context; +struct i915_vma; + +struct intel_context; struct intel_engine_cs; struct i915_request * igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); +struct i915_vma * +igt_emit_store_dw(struct i915_vma *vma, + u64 offset, + unsigned long count, + u32 val); + +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val); + #endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index be8974ccff24..384143aa7776 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -5,6 +5,7 @@ */ #include "mock_context.h" +#include "selftests/mock_drm.h" #include "selftests/mock_gtt.h" struct i915_gem_context * @@ -13,7 +14,6 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -23,6 +23,8 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + i915_gem_context_set_persistence(ctx); + mutex_init(&ctx->engines_mutex); e = default_engines(ctx); if (IS_ERR(e)) @@ -30,32 +32,26 @@ mock_context(struct drm_i915_private *i915, RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); mutex_init(&ctx->mutex); - ret = i915_gem_context_pin_hw_id(ctx); - if (ret < 0) - goto err_engines; - if (name) { struct i915_ppgtt *ppgtt; - ctx->name = kstrdup(name, GFP_KERNEL); - if (!ctx->name) - goto err_put; + strncpy(ctx->name, name, sizeof(ctx->name)); ppgtt = mock_ppgtt(i915, name); if (!ppgtt) goto err_put; + mutex_lock(&ctx->mutex); __set_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } return ctx; -err_engines: - free_engines(rcu_access_pointer(ctx->engines)); err_free: kfree(ctx); return NULL; @@ -73,22 +69,21 @@ void mock_context_close(struct i915_gem_context *ctx) void mock_init_contexts(struct drm_i915_private *i915) { - init_contexts(i915); + init_contexts(&i915->gem.contexts); } struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file) +live_context(struct drm_i915_private *i915, struct file *file) { struct i915_gem_context *ctx; int err; - - lockdep_assert_held(&i915->drm.struct_mutex); + u32 id; ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; - err = gem_context_register(ctx, file->driver_priv); + err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id); if (err < 0) goto err_ctx; @@ -102,7 +97,16 @@ err_ctx: struct i915_gem_context * kernel_context(struct drm_i915_private *i915) { - return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL); + struct i915_gem_context *ctx; + + ctx = i915_gem_create_context(i915, 0); + if (IS_ERR(ctx)) + return ctx; + + i915_gem_context_clear_bannable(ctx); + i915_gem_context_set_persistence(ctx); + + return ctx; } void kernel_context_close(struct i915_gem_context *ctx) diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h index 0b926653914f..fb83d2f09212 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -7,6 +7,9 @@ #ifndef __MOCK_CONTEXT_H #define __MOCK_CONTEXT_H +struct file; +struct drm_i915_private; + void mock_init_contexts(struct drm_i915_private *i915); struct i915_gem_context * @@ -16,7 +19,7 @@ mock_context(struct drm_i915_private *i915, void mock_context_close(struct i915_gem_context *ctx); struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file); +live_context(struct drm_i915_private *i915, struct file *file); struct i915_gem_context *kernel_context(struct drm_i915_private *i915); void kernel_context_close(struct i915_gem_context *ctx); diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c index b9e059d4328a..9272bef57092 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c @@ -76,20 +76,6 @@ static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) vm_unmap_ram(vaddr, mock->npages); } -static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return kmap(mock->pages[page_num]); -} - -static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return kunmap(mock->pages[page_num]); -} - static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) { return -ENODEV; @@ -99,8 +85,6 @@ static const struct dma_buf_ops mock_dmabuf_ops = { .map_dma_buf = mock_map_dma_buf, .unmap_dma_buf = mock_unmap_dma_buf, .release = mock_dmabuf_release, - .map = mock_dmabuf_kmap, - .unmap = mock_dmabuf_kunmap, .mmap = mock_dmabuf_mmap, .vmap = mock_dmabuf_vmap, .vunmap = mock_dmabuf_vunmap, diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h index f0f8bbd82dfc..22818bbb139d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h @@ -14,7 +14,7 @@ struct mock_dmabuf { struct page *pages[]; }; -static struct mock_dmabuf *to_mock(struct dma_buf *buf) +static inline struct mock_dmabuf *to_mock(struct dma_buf *buf) { return buf->priv; } diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h index 370360b4a148..688511afa883 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h @@ -7,6 +7,8 @@ #ifndef __MOCK_GEM_OBJECT_H__ #define __MOCK_GEM_OBJECT_H__ +#include "gem/i915_gem_object_types.h" + struct mock_object { struct drm_i915_gem_object base; }; |