summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
-rw-r--r--drivers/gpu/drm/i915/gem/Makefile1
-rw-r--r--drivers/gpu/drm/i915/gem/Makefile.header-test16
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_busy.c16
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.c125
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_client_blt.c65
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c1100
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.h74
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h54
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c46
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c211
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c642
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_fence.c5
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.c20
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ioctls.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_lmem.c56
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_lmem.h29
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c661
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.h31
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c248
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h144
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.c377
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.h25
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h70
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c126
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_phys.c20
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c201
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.c177
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.h29
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c92
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c202
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.h31
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c324
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.h34
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_throttle.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.c43
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c91
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_wait.c24
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gemfs.c31
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c14
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h6
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c730
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c68
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c224
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c1013
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c107
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c927
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c494
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c136
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h15
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_context.c38
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_context.h5
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c16
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h2
56 files changed, 5992 insertions, 3261 deletions
diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile
deleted file mode 100644
index 07e7b8b840ea..000000000000
--- a/drivers/gpu/drm/i915/gem/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-include $(src)/Makefile.header-test # Extra header tests
diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test
deleted file mode 100644
index 61e06cbb4b32..000000000000
--- a/drivers/gpu/drm/i915/gem/Makefile.header-test
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: MIT
-# Copyright © 2019 Intel Corporation
-
-# Test the headers are compilable as standalone units
-header_test := $(notdir $(wildcard $(src)/*.h))
-
-quiet_cmd_header_test = HDRTEST $@
- cmd_header_test = echo "\#include \"$(<F)\"" > $@
-
-header_test_%.c: %.h
- $(call cmd,header_test)
-
-extra-$(CONFIG_DRM_I915_WERROR) += \
- $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
-
-clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 6ad93a09968c..25235ef630c1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -9,16 +9,16 @@
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
-static __always_inline u32 __busy_read_flag(u8 id)
+static __always_inline u32 __busy_read_flag(u16 id)
{
- if (id == (u8)I915_ENGINE_CLASS_INVALID)
+ if (id == (u16)I915_ENGINE_CLASS_INVALID)
return 0xffff0000u;
GEM_BUG_ON(id >= 16);
return 0x10000u << id;
}
-static __always_inline u32 __busy_write_id(u8 id)
+static __always_inline u32 __busy_write_id(u16 id)
{
/*
* The uABI guarantees an active writer is also amongst the read
@@ -29,14 +29,14 @@ static __always_inline u32 __busy_write_id(u8 id)
* last_read - hence we always set both read and write busy for
* last_write.
*/
- if (id == (u8)I915_ENGINE_CLASS_INVALID)
+ if (id == (u16)I915_ENGINE_CLASS_INVALID)
return 0xffffffffu;
return (id + 1) | __busy_read_flag(id);
}
static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
+__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
{
const struct i915_request *rq;
@@ -57,7 +57,7 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
return 0;
/* Beware type-expansion follies! */
- BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
+ BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
return flag(rq->engine->uabi_class);
}
@@ -82,7 +82,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_busy *args = data;
struct drm_i915_gem_object *obj;
- struct reservation_object_list *list;
+ struct dma_resv_list *list;
unsigned int seq;
int err;
@@ -105,7 +105,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
* Alternatively, we can trade that extra information on read/write
* activity with
* args->busy =
- * !reservation_object_test_signaled_rcu(obj->resv, true);
+ * !dma_resv_test_signaled_rcu(obj->resv, true);
* to report the overall busyness. This is what the wait-ioctl does.
*
*/
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 5295285d5843..34be4c0ee7c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -8,87 +8,65 @@
#include "i915_drv.h"
#include "i915_gem_clflush.h"
-
-static DEFINE_SPINLOCK(clflush_lock);
+#include "i915_sw_fence_work.h"
+#include "i915_trace.h"
struct clflush {
- struct dma_fence dma; /* Must be first for dma_fence_free() */
- struct i915_sw_fence wait;
- struct work_struct work;
+ struct dma_fence_work base;
struct drm_i915_gem_object *obj;
};
-static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
-{
- return DRIVER_NAME;
-}
-
-static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
-{
- return "clflush";
-}
-
-static void i915_clflush_release(struct dma_fence *fence)
-{
- struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
-
- i915_sw_fence_fini(&clflush->wait);
-
- BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
- dma_fence_free(&clflush->dma);
-}
-
-static const struct dma_fence_ops i915_clflush_ops = {
- .get_driver_name = i915_clflush_get_driver_name,
- .get_timeline_name = i915_clflush_get_timeline_name,
- .release = i915_clflush_release,
-};
-
-static void __i915_do_clflush(struct drm_i915_gem_object *obj)
+static void __do_clflush(struct drm_i915_gem_object *obj)
{
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
drm_clflush_sg(obj->mm.pages);
- intel_fb_obj_flush(obj, ORIGIN_CPU);
+
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
}
-static void i915_clflush_work(struct work_struct *work)
+static int clflush_work(struct dma_fence_work *base)
{
- struct clflush *clflush = container_of(work, typeof(*clflush), work);
+ struct clflush *clflush = container_of(base, typeof(*clflush), base);
struct drm_i915_gem_object *obj = clflush->obj;
+ int err;
- if (i915_gem_object_pin_pages(obj)) {
- DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
- goto out;
- }
-
- __i915_do_clflush(obj);
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ return err;
+ __do_clflush(obj);
i915_gem_object_unpin_pages(obj);
-out:
- i915_gem_object_put(obj);
+ return 0;
+}
+
+static void clflush_release(struct dma_fence_work *base)
+{
+ struct clflush *clflush = container_of(base, typeof(*clflush), base);
- dma_fence_signal(&clflush->dma);
- dma_fence_put(&clflush->dma);
+ i915_gem_object_put(clflush->obj);
}
-static int __i915_sw_fence_call
-i915_clflush_notify(struct i915_sw_fence *fence,
- enum i915_sw_fence_notify state)
+static const struct dma_fence_work_ops clflush_ops = {
+ .name = "clflush",
+ .work = clflush_work,
+ .release = clflush_release,
+};
+
+static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj)
{
- struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
+ struct clflush *clflush;
- switch (state) {
- case FENCE_COMPLETE:
- schedule_work(&clflush->work);
- break;
+ GEM_BUG_ON(!obj->cache_dirty);
- case FENCE_FREE:
- dma_fence_put(&clflush->dma);
- break;
- }
+ clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
+ if (!clflush)
+ return NULL;
- return NOTIFY_DONE;
+ dma_fence_work_init(&clflush->base, &clflush_ops);
+ clflush->obj = i915_gem_object_get(obj); /* obj <-> clflush cycle */
+
+ return clflush;
}
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
@@ -126,33 +104,16 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
clflush = NULL;
if (!(flags & I915_CLFLUSH_SYNC))
- clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
+ clflush = clflush_work_create(obj);
if (clflush) {
- GEM_BUG_ON(!obj->cache_dirty);
-
- dma_fence_init(&clflush->dma,
- &i915_clflush_ops,
- &clflush_lock,
- to_i915(obj->base.dev)->mm.unordered_timeline,
- 0);
- i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
-
- clflush->obj = i915_gem_object_get(obj);
- INIT_WORK(&clflush->work, i915_clflush_work);
-
- dma_fence_get(&clflush->dma);
-
- i915_sw_fence_await_reservation(&clflush->wait,
- obj->base.resv, NULL,
- true, I915_FENCE_TIMEOUT,
+ i915_sw_fence_await_reservation(&clflush->base.chain,
+ obj->base.resv, NULL, true,
+ I915_FENCE_TIMEOUT,
I915_FENCE_GFP);
-
- reservation_object_add_excl_fence(obj->base.resv,
- &clflush->dma);
-
- i915_sw_fence_commit(&clflush->wait);
+ dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
+ dma_fence_work_commit(&clflush->base);
} else if (obj->mm.pages) {
- __i915_do_clflush(obj);
+ __do_clflush(obj);
} else {
GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 1fdab0767a47..81366aa4812b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -2,10 +2,13 @@
/*
* Copyright © 2019 Intel Corporation
*/
-#include "i915_gem_client_blt.h"
+#include "i915_drv.h"
+#include "gt/intel_context.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_pool.h"
+#include "i915_gem_client_blt.h"
#include "i915_gem_object_blt.h"
-#include "intel_drv.h"
struct i915_sleeve {
struct i915_vma *vma;
@@ -72,7 +75,6 @@ static struct i915_sleeve *create_sleeve(struct i915_address_space *vm,
vma->ops = &proxy_vma_ops;
sleeve->vma = vma;
- sleeve->obj = i915_gem_object_get(obj);
sleeve->pages = pages;
sleeve->page_sizes = *page_sizes;
@@ -85,7 +87,6 @@ err_free:
static void destroy_sleeve(struct i915_sleeve *sleeve)
{
- i915_gem_object_put(sleeve->obj);
kfree(sleeve);
}
@@ -154,32 +155,37 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence,
static void clear_pages_worker(struct work_struct *work)
{
struct clear_pages_work *w = container_of(work, typeof(*w), work);
- struct drm_i915_private *i915 = w->ce->gem_context->i915;
- struct drm_i915_gem_object *obj = w->sleeve->obj;
+ struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
struct i915_vma *vma = w->sleeve->vma;
struct i915_request *rq;
+ struct i915_vma *batch;
int err = w->dma.error;
if (unlikely(err))
goto out_signal;
if (obj->cache_dirty) {
- obj->write_domain = 0;
if (i915_gem_object_has_struct_page(obj))
drm_clflush_sg(w->sleeve->pages);
obj->cache_dirty = false;
}
+ obj->read_domains = I915_GEM_GPU_DOMAINS;
+ obj->write_domain = 0;
- /* XXX: we need to kill this */
- mutex_lock(&i915->drm.struct_mutex);
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (unlikely(err))
- goto out_unlock;
+ goto out_signal;
+
+ batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_unpin;
+ }
- rq = i915_request_create(w->ce);
+ rq = intel_context_create_request(w->ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto out_unpin;
+ goto out_batch;
}
/* There's no way the fence has signalled */
@@ -187,20 +193,28 @@ static void clear_pages_worker(struct work_struct *work)
clear_pages_dma_fence_cb))
GEM_BUG_ON(1);
+ err = intel_emit_vma_mark_active(batch, rq);
+ if (unlikely(err))
+ goto out_request;
+
if (w->ce->engine->emit_init_breadcrumb) {
err = w->ce->engine->emit_init_breadcrumb(rq);
if (unlikely(err))
goto out_request;
}
- /* XXX: more feverish nightmares await */
- i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
+ /*
+ * w->dma is already exported via (vma|obj)->resv we need only
+ * keep track of the GPU activity within this vma/request, and
+ * propagate the signal from the request to w->dma.
+ */
+ err = __i915_vma_move_to_active(vma, rq);
if (err)
goto out_request;
- err = intel_emit_vma_fill_blt(rq, vma, w->value);
+ err = w->ce->engine->emit_bb_start(rq,
+ batch->node.start, batch->node.size,
+ 0);
out_request:
if (unlikely(err)) {
i915_request_skip(rq, err);
@@ -208,10 +222,10 @@ out_request:
}
i915_request_add(rq);
+out_batch:
+ intel_emit_vma_release(w->ce, batch);
out_unpin:
i915_vma_unpin(vma);
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
out_signal:
if (unlikely(err)) {
dma_fence_set_error(&w->dma, err);
@@ -248,14 +262,11 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
struct i915_page_sizes *page_sizes,
u32 value)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_gem_context *ctx = ce->gem_context;
- struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
struct clear_pages_work *work;
struct i915_sleeve *sleeve;
int err;
- sleeve = create_sleeve(vm, obj, pages, page_sizes);
+ sleeve = create_sleeve(ce->vm, obj, pages, page_sizes);
if (IS_ERR(sleeve))
return PTR_ERR(sleeve);
@@ -273,11 +284,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
init_irq_work(&work->irq_work, clear_pages_signal_irq_worker);
- dma_fence_init(&work->dma,
- &clear_pages_work_ops,
- &fence_lock,
- i915->mm.unordered_timeline,
- 0);
+ dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
i915_sw_fence_init(&work->wait, clear_pages_work_notify);
i915_gem_object_lock(obj);
@@ -288,7 +295,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
if (err < 0) {
dma_fence_set_error(&work->dma, err);
} else {
- reservation_object_add_excl_fence(obj->base.resv, &work->dma);
+ dma_resv_add_excl_fence(obj->base.resv, &work->dma);
err = 0;
}
i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0f2c22a3bcb6..a2e57e62af30 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -69,7 +69,13 @@
#include <drm/i915_drm.h>
+#include "gt/gen6_ppgtt.h"
+#include "gt/intel_context.h"
+#include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_user.h"
#include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
#include "i915_gem_context.h"
#include "i915_globals.h"
@@ -158,7 +164,7 @@ lookup_user_engine(struct i915_gem_context *ctx,
if (!engine)
return ERR_PTR(-EINVAL);
- idx = engine->id;
+ idx = engine->legacy_idx;
} else {
idx = ci->engine_instance;
}
@@ -166,93 +172,71 @@ lookup_user_engine(struct i915_gem_context *ctx,
return i915_gem_context_get_engine(ctx, idx);
}
-static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
+static struct i915_address_space *
+context_get_vm_rcu(struct i915_gem_context *ctx)
{
- unsigned int max;
+ GEM_BUG_ON(!rcu_access_pointer(ctx->vm));
- lockdep_assert_held(&i915->contexts.mutex);
+ do {
+ struct i915_address_space *vm;
- if (INTEL_GEN(i915) >= 11)
- max = GEN11_MAX_CONTEXT_HW_ID;
- else if (USES_GUC_SUBMISSION(i915))
/*
- * When using GuC in proxy submission, GuC consumes the
- * highest bit in the context id to indicate proxy submission.
+ * We do not allow downgrading from full-ppgtt [to a shared
+ * global gtt], so ctx->vm cannot become NULL.
*/
- max = MAX_GUC_CONTEXT_HW_ID;
- else
- max = MAX_CONTEXT_HW_ID;
-
- return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
-}
-
-static int steal_hw_id(struct drm_i915_private *i915)
-{
- struct i915_gem_context *ctx, *cn;
- LIST_HEAD(pinned);
- int id = -ENOSPC;
-
- lockdep_assert_held(&i915->contexts.mutex);
-
- list_for_each_entry_safe(ctx, cn,
- &i915->contexts.hw_id_list, hw_id_link) {
- if (atomic_read(&ctx->hw_id_pin_count)) {
- list_move_tail(&ctx->hw_id_link, &pinned);
+ vm = rcu_dereference(ctx->vm);
+ if (!kref_get_unless_zero(&vm->ref))
continue;
- }
- GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
- list_del_init(&ctx->hw_id_link);
- id = ctx->hw_id;
- break;
- }
+ /*
+ * This ppgtt may have be reallocated between
+ * the read and the kref, and reassigned to a third
+ * context. In order to avoid inadvertent sharing
+ * of this ppgtt with that third context (and not
+ * src), we have to confirm that we have the same
+ * ppgtt after passing through the strong memory
+ * barrier implied by a successful
+ * kref_get_unless_zero().
+ *
+ * Once we have acquired the current ppgtt of ctx,
+ * we no longer care if it is released from ctx, as
+ * it cannot be reallocated elsewhere.
+ */
- /*
- * Remember how far we got up on the last repossesion scan, so the
- * list is kept in a "least recently scanned" order.
- */
- list_splice_tail(&pinned, &i915->contexts.hw_id_list);
- return id;
+ if (vm == rcu_access_pointer(ctx->vm))
+ return rcu_pointer_handoff(vm);
+
+ i915_vm_put(vm);
+ } while (1);
}
-static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
+static void intel_context_set_gem(struct intel_context *ce,
+ struct i915_gem_context *ctx)
{
- int ret;
+ GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
+ RCU_INIT_POINTER(ce->gem_context, ctx);
- lockdep_assert_held(&i915->contexts.mutex);
+ if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+ ce->ring = __intel_context_ring_size(SZ_16K);
- /*
- * We prefer to steal/stall ourselves and our users over that of the
- * entire system. That may be a little unfair to our users, and
- * even hurt high priority clients. The choice is whether to oomkill
- * something else, or steal a context id.
- */
- ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
- if (unlikely(ret < 0)) {
- ret = steal_hw_id(i915);
- if (ret < 0) /* once again for the correct errno code */
- ret = new_hw_id(i915, GFP_KERNEL);
- if (ret < 0)
- return ret;
- }
+ if (rcu_access_pointer(ctx->vm)) {
+ struct i915_address_space *vm;
- *out = ret;
- return 0;
-}
+ rcu_read_lock();
+ vm = context_get_vm_rcu(ctx); /* hmm */
+ rcu_read_unlock();
-static void release_hw_id(struct i915_gem_context *ctx)
-{
- struct drm_i915_private *i915 = ctx->i915;
+ i915_vm_put(ce->vm);
+ ce->vm = vm;
+ }
- if (list_empty(&ctx->hw_id_link))
- return;
+ GEM_BUG_ON(ce->timeline);
+ if (ctx->timeline)
+ ce->timeline = intel_timeline_get(ctx->timeline);
- mutex_lock(&i915->contexts.mutex);
- if (!list_empty(&ctx->hw_id_link)) {
- ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
- list_del_init(&ctx->hw_id_link);
- }
- mutex_unlock(&i915->contexts.mutex);
+ if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
+ intel_engine_has_semaphores(ce->engine))
+ __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
}
static void __free_engines(struct i915_gem_engines *e, unsigned int count)
@@ -261,6 +245,7 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count)
if (!e->engines[count])
continue;
+ RCU_INIT_POINTER(e->engines[count]->gem_context, NULL);
intel_context_put(e->engines[count]);
}
kfree(e);
@@ -278,6 +263,7 @@ static void free_engines_rcu(struct rcu_head *rcu)
static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
{
+ const struct intel_gt *gt = &ctx->i915->gt;
struct intel_engine_cs *engine;
struct i915_gem_engines *e;
enum intel_engine_id id;
@@ -287,104 +273,254 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
return ERR_PTR(-ENOMEM);
init_rcu_head(&e->rcu);
- for_each_engine(engine, ctx->i915, id) {
+ for_each_engine(engine, gt, id) {
struct intel_context *ce;
- ce = intel_context_create(ctx, engine);
+ if (engine->legacy_idx == INVALID_ENGINE)
+ continue;
+
+ GEM_BUG_ON(engine->legacy_idx >= I915_NUM_ENGINES);
+ GEM_BUG_ON(e->engines[engine->legacy_idx]);
+
+ ce = intel_context_create(engine);
if (IS_ERR(ce)) {
- __free_engines(e, id);
+ __free_engines(e, e->num_engines + 1);
return ERR_CAST(ce);
}
- e->engines[id] = ce;
+ intel_context_set_gem(ce, ctx);
+
+ e->engines[engine->legacy_idx] = ce;
+ e->num_engines = max(e->num_engines, engine->legacy_idx);
}
- e->num_engines = id;
+ e->num_engines++;
return e;
}
static void i915_gem_context_free(struct i915_gem_context *ctx)
{
- lockdep_assert_held(&ctx->i915->drm.struct_mutex);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
- release_hw_id(ctx);
- if (ctx->vm)
- i915_vm_put(ctx->vm);
+ spin_lock(&ctx->i915->gem.contexts.lock);
+ list_del(&ctx->link);
+ spin_unlock(&ctx->i915->gem.contexts.lock);
free_engines(rcu_access_pointer(ctx->engines));
mutex_destroy(&ctx->engines_mutex);
if (ctx->timeline)
- i915_timeline_put(ctx->timeline);
+ intel_timeline_put(ctx->timeline);
- kfree(ctx->name);
put_pid(ctx->pid);
-
- list_del(&ctx->link);
mutex_destroy(&ctx->mutex);
kfree_rcu(ctx, rcu);
}
-static void contexts_free(struct drm_i915_private *i915)
+static void contexts_free_all(struct llist_node *list)
{
- struct llist_node *freed = llist_del_all(&i915->contexts.free_list);
struct i915_gem_context *ctx, *cn;
- lockdep_assert_held(&i915->drm.struct_mutex);
-
- llist_for_each_entry_safe(ctx, cn, freed, free_link)
+ llist_for_each_entry_safe(ctx, cn, list, free_link)
i915_gem_context_free(ctx);
}
-static void contexts_free_first(struct drm_i915_private *i915)
+static void contexts_flush_free(struct i915_gem_contexts *gc)
{
- struct i915_gem_context *ctx;
- struct llist_node *freed;
-
- lockdep_assert_held(&i915->drm.struct_mutex);
-
- freed = llist_del_first(&i915->contexts.free_list);
- if (!freed)
- return;
-
- ctx = container_of(freed, typeof(*ctx), free_link);
- i915_gem_context_free(ctx);
+ contexts_free_all(llist_del_all(&gc->free_list));
}
static void contexts_free_worker(struct work_struct *work)
{
- struct drm_i915_private *i915 =
- container_of(work, typeof(*i915), contexts.free_work);
+ struct i915_gem_contexts *gc =
+ container_of(work, typeof(*gc), free_work);
- mutex_lock(&i915->drm.struct_mutex);
- contexts_free(i915);
- mutex_unlock(&i915->drm.struct_mutex);
+ contexts_flush_free(gc);
}
void i915_gem_context_release(struct kref *ref)
{
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
- struct drm_i915_private *i915 = ctx->i915;
+ struct i915_gem_contexts *gc = &ctx->i915->gem.contexts;
trace_i915_context_free(ctx);
- if (llist_add(&ctx->free_link, &i915->contexts.free_list))
- queue_work(i915->wq, &i915->contexts.free_work);
+ if (llist_add(&ctx->free_link, &gc->free_list))
+ schedule_work(&gc->free_work);
}
-static void context_close(struct i915_gem_context *ctx)
+static inline struct i915_gem_engines *
+__context_engines_static(const struct i915_gem_context *ctx)
{
- mutex_lock(&ctx->mutex);
+ return rcu_dereference_protected(ctx->engines, true);
+}
- i915_gem_context_set_closed(ctx);
- ctx->file_priv = ERR_PTR(-EBADF);
+static bool __reset_engine(struct intel_engine_cs *engine)
+{
+ struct intel_gt *gt = engine->gt;
+ bool success = false;
+
+ if (!intel_has_reset_engine(gt))
+ return false;
+
+ if (!test_and_set_bit(I915_RESET_ENGINE + engine->id,
+ &gt->reset.flags)) {
+ success = intel_engine_reset(engine, NULL) == 0;
+ clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
+ &gt->reset.flags);
+ }
+
+ return success;
+}
+
+static void __reset_context(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ intel_gt_handle_error(engine->gt, engine->mask, 0,
+ "context closure in %s", ctx->name);
+}
+
+static bool __cancel_engine(struct intel_engine_cs *engine)
+{
+ /*
+ * Send a "high priority pulse" down the engine to cause the
+ * current request to be momentarily preempted. (If it fails to
+ * be preempted, it will be reset). As we have marked our context
+ * as banned, any incomplete request, including any running, will
+ * be skipped following the preemption.
+ *
+ * If there is no hangchecking (one of the reasons why we try to
+ * cancel the context) and no forced preemption, there may be no
+ * means by which we reset the GPU and evict the persistent hog.
+ * Ergo if we are unable to inject a preemptive pulse that can
+ * kill the banned context, we fallback to doing a local reset
+ * instead.
+ */
+ if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) &&
+ !intel_engine_pulse(engine))
+ return true;
+
+ /* If we are unable to send a pulse, try resetting this engine. */
+ return __reset_engine(engine);
+}
+
+static struct intel_engine_cs *__active_engine(struct i915_request *rq)
+{
+ struct intel_engine_cs *engine, *locked;
/*
- * This context will never again be assinged to HW, so we can
- * reuse its ID for the next context.
+ * Serialise with __i915_request_submit() so that it sees
+ * is-banned?, or we know the request is already inflight.
*/
- release_hw_id(ctx);
+ locked = READ_ONCE(rq->engine);
+ spin_lock_irq(&locked->active.lock);
+ while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
+ spin_unlock(&locked->active.lock);
+ spin_lock(&engine->active.lock);
+ locked = engine;
+ }
+
+ engine = NULL;
+ if (i915_request_is_active(rq) && !rq->fence.error)
+ engine = rq->engine;
+
+ spin_unlock_irq(&locked->active.lock);
+
+ return engine;
+}
+
+static struct intel_engine_cs *active_engine(struct intel_context *ce)
+{
+ struct intel_engine_cs *engine = NULL;
+ struct i915_request *rq;
+
+ if (!ce->timeline)
+ return NULL;
+
+ mutex_lock(&ce->timeline->mutex);
+ list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
+ if (i915_request_completed(rq))
+ break;
+
+ /* Check with the backend if the request is inflight */
+ engine = __active_engine(rq);
+ if (engine)
+ break;
+ }
+ mutex_unlock(&ce->timeline->mutex);
+
+ return engine;
+}
+
+static void kill_context(struct i915_gem_context *ctx)
+{
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
+
+ /*
+ * Map the user's engine back to the actual engines; one virtual
+ * engine will be mapped to multiple engines, and using ctx->engine[]
+ * the same engine may be have multiple instances in the user's map.
+ * However, we only care about pending requests, so only include
+ * engines on which there are incomplete requests.
+ */
+ for_each_gem_engine(ce, __context_engines_static(ctx), it) {
+ struct intel_engine_cs *engine;
+
+ if (intel_context_set_banned(ce))
+ continue;
+
+ /*
+ * Check the current active state of this context; if we
+ * are currently executing on the GPU we need to evict
+ * ourselves. On the other hand, if we haven't yet been
+ * submitted to the GPU or if everything is complete,
+ * we have nothing to do.
+ */
+ engine = active_engine(ce);
+
+ /* First attempt to gracefully cancel the context */
+ if (engine && !__cancel_engine(engine))
+ /*
+ * If we are unable to send a preemptive pulse to bump
+ * the context from the GPU, we have to resort to a full
+ * reset. We hope the collateral damage is worth it.
+ */
+ __reset_context(ctx, engine);
+ }
+}
+
+static void set_closed_name(struct i915_gem_context *ctx)
+{
+ char *s;
+
+ /* Replace '[]' with '<>' to indicate closed in debug prints */
+
+ s = strrchr(ctx->name, '[');
+ if (!s)
+ return;
+
+ *s = '<';
+
+ s = strchr(s + 1, ']');
+ if (s)
+ *s = '>';
+}
+
+static void context_close(struct i915_gem_context *ctx)
+{
+ struct i915_address_space *vm;
+
+ i915_gem_context_set_closed(ctx);
+ set_closed_name(ctx);
+
+ mutex_lock(&ctx->mutex);
+
+ vm = i915_gem_context_vm(ctx);
+ if (vm)
+ i915_vm_close(vm);
+
+ ctx->file_priv = ERR_PTR(-EBADF);
/*
* The LUT uses the VMA as a backpointer to unref the object,
@@ -394,31 +530,45 @@ static void context_close(struct i915_gem_context *ctx)
lut_close(ctx);
mutex_unlock(&ctx->mutex);
+
+ /*
+ * If the user has disabled hangchecking, we can not be sure that
+ * the batches will ever complete after the context is closed,
+ * keeping the context and all resources pinned forever. So in this
+ * case we opt to forcibly kill off all remaining requests on
+ * context close.
+ */
+ if (!i915_gem_context_is_persistent(ctx) ||
+ !i915_modparams.enable_hangcheck)
+ kill_context(ctx);
+
i915_gem_context_put(ctx);
}
-static u32 default_desc_template(const struct drm_i915_private *i915,
- const struct i915_address_space *vm)
+static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
{
- u32 address_mode;
- u32 desc;
-
- desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
+ if (i915_gem_context_is_persistent(ctx) == state)
+ return 0;
- address_mode = INTEL_LEGACY_32B_CONTEXT;
- if (vm && i915_vm_is_4lvl(vm))
- address_mode = INTEL_LEGACY_64B_CONTEXT;
- desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
+ if (state) {
+ /*
+ * Only contexts that are short-lived [that will expire or be
+ * reset] are allowed to survive past termination. We require
+ * hangcheck to ensure that the persistent requests are healthy.
+ */
+ if (!i915_modparams.enable_hangcheck)
+ return -EINVAL;
- if (IS_GEN(i915, 8))
- desc |= GEN8_CTX_L3LLC_COHERENT;
+ i915_gem_context_set_persistence(ctx);
+ } else {
+ /* To cancel a context we use "preempt-to-idle" */
+ if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+ return -ENODEV;
- /* TODO: WaDisableLiteRestore when we start using semaphore
- * signalling between Command Streamers
- * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
- */
+ i915_gem_context_clear_persistence(ctx);
+ }
- return desc;
+ return 0;
}
static struct i915_gem_context *
@@ -434,7 +584,6 @@ __create_context(struct drm_i915_private *i915)
return ERR_PTR(-ENOMEM);
kref_init(&ctx->ref);
- list_add_tail(&ctx->link, &i915->contexts.list);
ctx->i915 = i915;
ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
mutex_init(&ctx->mutex);
@@ -448,7 +597,6 @@ __create_context(struct drm_i915_private *i915)
RCU_INIT_POINTER(ctx->engines, e);
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
- INIT_LIST_HEAD(&ctx->hw_id_link);
/* NB: Mark all slices as needing a remap so that when the context first
* loads it will restore whatever remap state already exists. If there
@@ -457,14 +605,15 @@ __create_context(struct drm_i915_private *i915)
i915_gem_context_set_bannable(ctx);
i915_gem_context_set_recoverable(ctx);
-
- ctx->ring_size = 4 * PAGE_SIZE;
- ctx->desc_template =
- default_desc_template(i915, &i915->mm.aliasing_ppgtt->vm);
+ __context_set_persistence(ctx, true /* cgroup hook? */);
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+ spin_lock(&i915->gem.contexts.lock);
+ list_add_tail(&ctx->link, &i915->gem.contexts.list);
+ spin_unlock(&i915->gem.contexts.lock);
+
return ctx;
err_free:
@@ -472,13 +621,34 @@ err_free:
return ERR_PTR(err);
}
+static void
+context_apply_all(struct i915_gem_context *ctx,
+ void (*fn)(struct intel_context *ce, void *data),
+ void *data)
+{
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
+
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
+ fn(ce, data);
+ i915_gem_context_unlock_engines(ctx);
+}
+
+static void __apply_ppgtt(struct intel_context *ce, void *vm)
+{
+ i915_vm_put(ce->vm);
+ ce->vm = i915_vm_get(vm);
+}
+
static struct i915_address_space *
__set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
{
- struct i915_address_space *old = ctx->vm;
+ struct i915_address_space *old = i915_gem_context_vm(ctx);
+
+ GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
- ctx->vm = i915_vm_get(vm);
- ctx->desc_template = default_desc_template(ctx->i915, vm);
+ rcu_assign_pointer(ctx->vm, i915_vm_open(vm));
+ context_apply_all(ctx, __apply_ppgtt, vm);
return old;
}
@@ -486,36 +656,57 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
static void __assign_ppgtt(struct i915_gem_context *ctx,
struct i915_address_space *vm)
{
- if (vm == ctx->vm)
+ if (vm == rcu_access_pointer(ctx->vm))
return;
vm = __set_ppgtt(ctx, vm);
if (vm)
- i915_vm_put(vm);
+ i915_vm_close(vm);
+}
+
+static void __set_timeline(struct intel_timeline **dst,
+ struct intel_timeline *src)
+{
+ struct intel_timeline *old = *dst;
+
+ *dst = src ? intel_timeline_get(src) : NULL;
+
+ if (old)
+ intel_timeline_put(old);
+}
+
+static void __apply_timeline(struct intel_context *ce, void *timeline)
+{
+ __set_timeline(&ce->timeline, timeline);
+}
+
+static void __assign_timeline(struct i915_gem_context *ctx,
+ struct intel_timeline *timeline)
+{
+ __set_timeline(&ctx->timeline, timeline);
+ context_apply_all(ctx, __apply_timeline, timeline);
}
static struct i915_gem_context *
-i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
+i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
{
struct i915_gem_context *ctx;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
- !HAS_EXECLISTS(dev_priv))
+ !HAS_EXECLISTS(i915))
return ERR_PTR(-EINVAL);
- /* Reap the most stale context */
- contexts_free_first(dev_priv);
+ /* Reap the stale contexts */
+ contexts_flush_free(&i915->gem.contexts);
- ctx = __create_context(dev_priv);
+ ctx = __create_context(i915);
if (IS_ERR(ctx))
return ctx;
- if (HAS_FULL_PPGTT(dev_priv)) {
+ if (HAS_FULL_PPGTT(i915)) {
struct i915_ppgtt *ppgtt;
- ppgtt = i915_ppgtt_create(dev_priv);
+ ppgtt = i915_ppgtt_create(&i915->gt);
if (IS_ERR(ppgtt)) {
DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt));
@@ -523,20 +714,24 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
return ERR_CAST(ppgtt);
}
+ mutex_lock(&ctx->mutex);
__assign_ppgtt(ctx, &ppgtt->vm);
+ mutex_unlock(&ctx->mutex);
+
i915_vm_put(&ppgtt->vm);
}
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
- struct i915_timeline *timeline;
+ struct intel_timeline *timeline;
- timeline = i915_timeline_create(dev_priv, NULL);
+ timeline = intel_timeline_create(&i915->gt, NULL);
if (IS_ERR(timeline)) {
context_close(ctx);
return ERR_CAST(timeline);
}
- ctx->timeline = timeline;
+ __assign_timeline(ctx, timeline);
+ intel_timeline_put(timeline);
}
trace_i915_context_create(ctx);
@@ -544,171 +739,26 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
return ctx;
}
-/**
- * i915_gem_context_create_gvt - create a GVT GEM context
- * @dev: drm device *
- *
- * This function is used to create a GVT specific GEM context.
- *
- * Returns:
- * pointer to i915_gem_context on success, error pointer if failed
- *
- */
-struct i915_gem_context *
-i915_gem_context_create_gvt(struct drm_device *dev)
-{
- struct i915_gem_context *ctx;
- int ret;
-
- if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
- return ERR_PTR(-ENODEV);
-
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ERR_PTR(ret);
-
- ctx = i915_gem_create_context(to_i915(dev), 0);
- if (IS_ERR(ctx))
- goto out;
-
- ret = i915_gem_context_pin_hw_id(ctx);
- if (ret) {
- context_close(ctx);
- ctx = ERR_PTR(ret);
- goto out;
- }
-
- ctx->file_priv = ERR_PTR(-EBADF);
- i915_gem_context_set_closed(ctx); /* not user accessible */
- i915_gem_context_clear_bannable(ctx);
- i915_gem_context_set_force_single_submission(ctx);
- if (!USES_GUC_SUBMISSION(to_i915(dev)))
- ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
-
- GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
-out:
- mutex_unlock(&dev->struct_mutex);
- return ctx;
-}
-
-static void
-destroy_kernel_context(struct i915_gem_context **ctxp)
-{
- struct i915_gem_context *ctx;
-
- /* Keep the context ref so that we can free it immediately ourselves */
- ctx = i915_gem_context_get(fetch_and_zero(ctxp));
- GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
-
- context_close(ctx);
- i915_gem_context_free(ctx);
-}
-
-struct i915_gem_context *
-i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
-{
- struct i915_gem_context *ctx;
- int err;
-
- ctx = i915_gem_create_context(i915, 0);
- if (IS_ERR(ctx))
- return ctx;
-
- err = i915_gem_context_pin_hw_id(ctx);
- if (err) {
- destroy_kernel_context(&ctx);
- return ERR_PTR(err);
- }
-
- i915_gem_context_clear_bannable(ctx);
- ctx->sched.priority = I915_USER_PRIORITY(prio);
- ctx->ring_size = PAGE_SIZE;
-
- GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
-
- return ctx;
-}
-
-static void init_contexts(struct drm_i915_private *i915)
+static void init_contexts(struct i915_gem_contexts *gc)
{
- mutex_init(&i915->contexts.mutex);
- INIT_LIST_HEAD(&i915->contexts.list);
+ spin_lock_init(&gc->lock);
+ INIT_LIST_HEAD(&gc->list);
- /* Using the simple ida interface, the max is limited by sizeof(int) */
- BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
- BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
- ida_init(&i915->contexts.hw_ida);
- INIT_LIST_HEAD(&i915->contexts.hw_id_list);
-
- INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
- init_llist_head(&i915->contexts.free_list);
+ INIT_WORK(&gc->free_work, contexts_free_worker);
+ init_llist_head(&gc->free_list);
}
-static bool needs_preempt_context(struct drm_i915_private *i915)
+void i915_gem_init__contexts(struct drm_i915_private *i915)
{
- return HAS_EXECLISTS(i915);
-}
-
-int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
-{
- struct i915_gem_context *ctx;
-
- /* Reassure ourselves we are only called once */
- GEM_BUG_ON(dev_priv->kernel_context);
- GEM_BUG_ON(dev_priv->preempt_context);
-
- intel_engine_init_ctx_wa(dev_priv->engine[RCS0]);
- init_contexts(dev_priv);
-
- /* lowest priority; idle task */
- ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
- if (IS_ERR(ctx)) {
- DRM_ERROR("Failed to create default global context\n");
- return PTR_ERR(ctx);
- }
- /*
- * For easy recognisablity, we want the kernel context to be 0 and then
- * all user contexts will have non-zero hw_id. Kernel contexts are
- * permanently pinned, so that we never suffer a stall and can
- * use them from any allocation context (e.g. for evicting other
- * contexts and from inside the shrinker).
- */
- GEM_BUG_ON(ctx->hw_id);
- GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
- dev_priv->kernel_context = ctx;
-
- /* highest priority; preempting task */
- if (needs_preempt_context(dev_priv)) {
- ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX);
- if (!IS_ERR(ctx))
- dev_priv->preempt_context = ctx;
- else
- DRM_ERROR("Failed to create preempt context; disabling preemption\n");
- }
-
+ init_contexts(&i915->gem.contexts);
DRM_DEBUG_DRIVER("%s context support initialized\n",
- DRIVER_CAPS(dev_priv)->has_logical_contexts ?
+ DRIVER_CAPS(i915)->has_logical_contexts ?
"logical" : "fake");
- return 0;
}
-void i915_gem_contexts_fini(struct drm_i915_private *i915)
+void i915_gem_driver_release__contexts(struct drm_i915_private *i915)
{
- lockdep_assert_held(&i915->drm.struct_mutex);
-
- if (i915->preempt_context)
- destroy_kernel_context(&i915->preempt_context);
- destroy_kernel_context(&i915->kernel_context);
-
- /* Must free all deferred contexts (via flush_workqueue) first */
- GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
- ida_destroy(&i915->contexts.hw_ida);
-}
-
-static int context_idr_cleanup(int id, void *p, void *data)
-{
- context_close(p);
- return 0;
+ flush_work(&i915->gem.contexts.free_work);
}
static int vm_idr_cleanup(int id, void *p, void *data)
@@ -718,33 +768,29 @@ static int vm_idr_cleanup(int id, void *p, void *data)
}
static int gem_context_register(struct i915_gem_context *ctx,
- struct drm_i915_file_private *fpriv)
+ struct drm_i915_file_private *fpriv,
+ u32 *id)
{
+ struct i915_address_space *vm;
int ret;
ctx->file_priv = fpriv;
- if (ctx->vm)
- ctx->vm->file = fpriv;
+
+ mutex_lock(&ctx->mutex);
+ vm = i915_gem_context_vm(ctx);
+ if (vm)
+ WRITE_ONCE(vm->file, fpriv); /* XXX */
+ mutex_unlock(&ctx->mutex);
ctx->pid = get_task_pid(current, PIDTYPE_PID);
- ctx->name = kasprintf(GFP_KERNEL, "%s[%d]",
- current->comm, pid_nr(ctx->pid));
- if (!ctx->name) {
- ret = -ENOMEM;
- goto err_pid;
- }
+ snprintf(ctx->name, sizeof(ctx->name), "%s[%d]",
+ current->comm, pid_nr(ctx->pid));
/* And finally expose ourselves to userspace via the idr */
- mutex_lock(&fpriv->context_idr_lock);
- ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL);
- mutex_unlock(&fpriv->context_idr_lock);
- if (ret >= 0)
- goto out;
+ ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL);
+ if (ret)
+ put_pid(fetch_and_zero(&ctx->pid));
- kfree(fetch_and_zero(&ctx->name));
-err_pid:
- put_pid(fetch_and_zero(&ctx->pid));
-out:
return ret;
}
@@ -754,51 +800,51 @@ int i915_gem_context_open(struct drm_i915_private *i915,
struct drm_i915_file_private *file_priv = file->driver_priv;
struct i915_gem_context *ctx;
int err;
+ u32 id;
- mutex_init(&file_priv->context_idr_lock);
- mutex_init(&file_priv->vm_idr_lock);
+ xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC);
- idr_init(&file_priv->context_idr);
+ mutex_init(&file_priv->vm_idr_lock);
idr_init_base(&file_priv->vm_idr, 1);
- mutex_lock(&i915->drm.struct_mutex);
ctx = i915_gem_create_context(i915, 0);
- mutex_unlock(&i915->drm.struct_mutex);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto err;
}
- err = gem_context_register(ctx, file_priv);
+ err = gem_context_register(ctx, file_priv, &id);
if (err < 0)
goto err_ctx;
- GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
- GEM_BUG_ON(err > 0);
-
+ GEM_BUG_ON(id);
return 0;
err_ctx:
context_close(ctx);
err:
idr_destroy(&file_priv->vm_idr);
- idr_destroy(&file_priv->context_idr);
+ xa_destroy(&file_priv->context_xa);
mutex_destroy(&file_priv->vm_idr_lock);
- mutex_destroy(&file_priv->context_idr_lock);
return err;
}
void i915_gem_context_close(struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct drm_i915_private *i915 = file_priv->dev_priv;
+ struct i915_gem_context *ctx;
+ unsigned long idx;
- idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
- idr_destroy(&file_priv->context_idr);
- mutex_destroy(&file_priv->context_idr_lock);
+ xa_for_each(&file_priv->context_xa, idx, ctx)
+ context_close(ctx);
+ xa_destroy(&file_priv->context_xa);
idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
idr_destroy(&file_priv->vm_idr);
mutex_destroy(&file_priv->vm_idr_lock);
+
+ contexts_flush_free(&i915->gem.contexts);
}
int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -816,7 +862,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
if (args->flags)
return -EINVAL;
- ppgtt = i915_ppgtt_create(i915);
+ ppgtt = i915_ppgtt_create(&i915->gt);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
@@ -891,6 +937,7 @@ struct context_barrier_task {
void *data;
};
+__i915_active_call
static void cb_retire(struct i915_active *base)
{
struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
@@ -910,21 +957,23 @@ static int context_barrier_task(struct i915_gem_context *ctx,
void (*task)(void *data),
void *data)
{
- struct drm_i915_private *i915 = ctx->i915;
struct context_barrier_task *cb;
struct i915_gem_engines_iter it;
struct intel_context *ce;
int err = 0;
- lockdep_assert_held(&i915->drm.struct_mutex);
GEM_BUG_ON(!task);
cb = kmalloc(sizeof(*cb), GFP_KERNEL);
if (!cb)
return -ENOMEM;
- i915_active_init(i915, &cb->base, cb_retire);
- i915_active_acquire(&cb->base);
+ i915_active_init(&cb->base, NULL, cb_retire);
+ err = i915_active_acquire(&cb->base);
+ if (err) {
+ kfree(cb);
+ return err;
+ }
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
struct i915_request *rq;
@@ -951,7 +1000,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
if (emit)
err = emit(rq, data);
if (err == 0)
- err = i915_active_ref(&cb->base, rq->fence.context, rq);
+ err = i915_active_add_request(&cb->base, rq);
i915_request_add(rq);
if (err)
@@ -974,16 +1023,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
struct i915_address_space *vm;
int ret;
- if (!ctx->vm)
+ if (!rcu_access_pointer(ctx->vm))
return -ENODEV;
- /* XXX rcu acquire? */
- ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
- if (ret)
- return ret;
-
- vm = i915_vm_get(ctx->vm);
- mutex_unlock(&ctx->i915->drm.struct_mutex);
+ rcu_read_lock();
+ vm = context_get_vm_rcu(ctx);
+ rcu_read_unlock();
ret = mutex_lock_interruptible(&file_priv->vm_idr_lock);
if (ret)
@@ -994,7 +1039,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
if (ret < 0)
goto err_unlock;
- i915_vm_get(vm);
+ i915_vm_open(vm);
args->size = 0;
args->value = ret;
@@ -1014,12 +1059,12 @@ static void set_ppgtt_barrier(void *data)
if (INTEL_GEN(old->i915) < 8)
gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old));
- i915_vm_put(old);
+ i915_vm_close(old);
}
static int emit_ppgtt_update(struct i915_request *rq, void *data)
{
- struct i915_address_space *vm = rq->gem_context->vm;
+ struct i915_address_space *vm = rq->context->vm;
struct intel_engine_cs *engine = rq->engine;
u32 base = engine->mmio_base;
u32 *cs;
@@ -1044,12 +1089,18 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
intel_ring_advance(rq, cs);
} else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ int err;
+
+ /* Magic required to prevent forcewake errors! */
+ err = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ return err;
cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+ *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
@@ -1060,9 +1111,6 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
}
*cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
- } else {
- /* ppGTT is not part of the legacy context image */
- gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
}
return 0;
@@ -1070,10 +1118,20 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
static bool skip_ppgtt_update(struct intel_context *ce, void *data)
{
+ if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+ return true;
+
if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
- return !ce->state;
- else
- return !atomic_read(&ce->pin_count);
+ return false;
+
+ if (!atomic_read(&ce->pin_count))
+ return true;
+
+ /* ppGTT is not part of the legacy context image */
+ if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
+ return true;
+
+ return false;
}
static int set_ppgtt(struct drm_i915_file_private *file_priv,
@@ -1086,34 +1144,34 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
if (args->size)
return -EINVAL;
- if (!ctx->vm)
+ if (!rcu_access_pointer(ctx->vm))
return -ENODEV;
if (upper_32_bits(args->value))
return -ENOENT;
- err = mutex_lock_interruptible(&file_priv->vm_idr_lock);
- if (err)
- return err;
-
+ rcu_read_lock();
vm = idr_find(&file_priv->vm_idr, args->value);
- if (vm)
- i915_vm_get(vm);
- mutex_unlock(&file_priv->vm_idr_lock);
+ if (vm && !kref_get_unless_zero(&vm->ref))
+ vm = NULL;
+ rcu_read_unlock();
if (!vm)
return -ENOENT;
- err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+ err = mutex_lock_interruptible(&ctx->mutex);
if (err)
goto out;
- if (vm == ctx->vm)
+ if (i915_gem_context_is_closed(ctx)) {
+ err = -ENOENT;
+ goto unlock;
+ }
+
+ if (vm == rcu_access_pointer(ctx->vm))
goto unlock;
/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
- mutex_lock(&ctx->mutex);
lut_close(ctx);
- mutex_unlock(&ctx->mutex);
old = __set_ppgtt(ctx, vm);
@@ -1128,14 +1186,12 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
set_ppgtt_barrier,
old);
if (err) {
- ctx->vm = old;
- ctx->desc_template = default_desc_template(ctx->i915, old);
- i915_vm_put(vm);
+ i915_vm_close(__set_ppgtt(ctx, old));
+ i915_vm_close(old);
}
unlock:
- mutex_unlock(&ctx->i915->drm.struct_mutex);
-
+ mutex_unlock(&ctx->mutex);
out:
i915_vm_put(vm);
return err;
@@ -1154,7 +1210,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
offset = i915_ggtt_offset(ce->state) +
LRC_STATE_PN * PAGE_SIZE +
- (CTX_R_PWR_CLK_STATE + 1) * 4;
+ CTX_R_PWR_CLK_STATE * 4;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = lower_32_bits(offset);
@@ -1180,44 +1236,32 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
* image, or into the registers directory, does not stick). Pristine
* and idle contexts will be configured on pinning.
*/
- if (!intel_context_is_pinned(ce))
+ if (!intel_context_pin_if_active(ce))
return 0;
- rq = i915_request_create(ce->engine->kernel_context);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- /* Queue this switch after all other activity by this context. */
- ret = i915_active_request_set(&ce->ring->timeline->last_request, rq);
- if (ret)
- goto out_add;
-
- /*
- * Guarantee context image and the timeline remains pinned until the
- * modifying request is retired by setting the ce activity tracker.
- *
- * But we only need to take one pin on the account of it. Or in other
- * words transfer the pinned ce object to tracked active request.
- */
- GEM_BUG_ON(i915_active_is_idle(&ce->active));
- ret = i915_active_ref(&ce->active, rq->fence.context, rq);
- if (ret)
- goto out_add;
+ rq = intel_engine_create_kernel_request(ce->engine);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ goto out_unpin;
+ }
- ret = gen8_emit_rpcs_config(rq, ce, sseu);
+ /* Serialise with the remote context */
+ ret = intel_context_prepare_remote_request(ce, rq);
+ if (ret == 0)
+ ret = gen8_emit_rpcs_config(rq, ce, sseu);
-out_add:
i915_request_add(rq);
+out_unpin:
+ intel_context_unpin(ce);
return ret;
}
static int
-__intel_context_reconfigure_sseu(struct intel_context *ce,
- struct intel_sseu sseu)
+intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
{
int ret;
- GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
+ GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8);
ret = intel_context_lock_pinned(ce);
if (ret)
@@ -1237,23 +1281,6 @@ unlock:
}
static int
-intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
-{
- struct drm_i915_private *i915 = ce->gem_context->i915;
- int ret;
-
- ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
- if (ret)
- return ret;
-
- ret = __intel_context_reconfigure_sseu(ce, sseu);
-
- mutex_unlock(&i915->drm.struct_mutex);
-
- return ret;
-}
-
-static int
user_to_context_sseu(struct drm_i915_private *i915,
const struct drm_i915_gem_context_param_sseu *user,
struct intel_sseu *context)
@@ -1484,12 +1511,14 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
}
}
- ce = intel_execlists_create_virtual(set->ctx, siblings, n);
+ ce = intel_execlists_create_virtual(siblings, n);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out_siblings;
}
+ intel_context_set_gem(ce, set->ctx);
+
if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
intel_context_put(ce);
err = -EEXIST;
@@ -1636,6 +1665,7 @@ set_engines(struct i915_gem_context *ctx,
for (n = 0; n < num_engines; n++) {
struct i915_engine_class_instance ci;
struct intel_engine_cs *engine;
+ struct intel_context *ce;
if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
__free_engines(set.engines, n);
@@ -1658,11 +1688,15 @@ set_engines(struct i915_gem_context *ctx,
return -ENOENT;
}
- set.engines->engines[n] = intel_context_create(ctx, engine);
- if (!set.engines->engines[n]) {
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
__free_engines(set.engines, n);
- return -ENOMEM;
+ return PTR_ERR(ce);
}
+
+ intel_context_set_gem(ce, ctx);
+
+ set.engines->engines[n] = ce;
}
set.engines->num_engines = num_engines;
@@ -1683,7 +1717,7 @@ replace:
i915_gem_context_set_user_engines(ctx);
else
i915_gem_context_clear_user_engines(ctx);
- rcu_swap_protected(ctx->engines, set.engines, 1);
+ set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1);
mutex_unlock(&ctx->engines_mutex);
call_rcu(&set.engines->rcu, free_engines_rcu);
@@ -1776,7 +1810,7 @@ get_engines(struct i915_gem_context *ctx,
if (e->engines[n]) {
ci.engine_class = e->engines[n]->engine->uabi_class;
- ci.engine_instance = e->engines[n]->engine->instance;
+ ci.engine_instance = e->engines[n]->engine->uabi_instance;
}
if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
@@ -1792,6 +1826,54 @@ err_free:
return err;
}
+static int
+set_persistence(struct i915_gem_context *ctx,
+ const struct drm_i915_gem_context_param *args)
+{
+ if (args->size)
+ return -EINVAL;
+
+ return __context_set_persistence(ctx, args->value);
+}
+
+static void __apply_priority(struct intel_context *ce, void *arg)
+{
+ struct i915_gem_context *ctx = arg;
+
+ if (!intel_engine_has_semaphores(ce->engine))
+ return;
+
+ if (ctx->sched.priority >= I915_PRIORITY_NORMAL)
+ intel_context_set_use_semaphores(ce);
+ else
+ intel_context_clear_use_semaphores(ce);
+}
+
+static int set_priority(struct i915_gem_context *ctx,
+ const struct drm_i915_gem_context_param *args)
+{
+ s64 priority = args->value;
+
+ if (args->size)
+ return -EINVAL;
+
+ if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+ return -ENODEV;
+
+ if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
+ priority < I915_CONTEXT_MIN_USER_PRIORITY)
+ return -EINVAL;
+
+ if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
+ !capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ ctx->sched.priority = I915_USER_PRIORITY(priority);
+ context_apply_all(ctx, __apply_priority, ctx);
+
+ return 0;
+}
+
static int ctx_setparam(struct drm_i915_file_private *fpriv,
struct i915_gem_context *ctx,
struct drm_i915_gem_context_param *args)
@@ -1838,23 +1920,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
break;
case I915_CONTEXT_PARAM_PRIORITY:
- {
- s64 priority = args->value;
-
- if (args->size)
- ret = -EINVAL;
- else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
- ret = -ENODEV;
- else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
- priority < I915_CONTEXT_MIN_USER_PRIORITY)
- ret = -EINVAL;
- else if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
- !capable(CAP_SYS_NICE))
- ret = -EPERM;
- else
- ctx->sched.priority =
- I915_USER_PRIORITY(priority);
- }
+ ret = set_priority(ctx, args);
break;
case I915_CONTEXT_PARAM_SSEU:
@@ -1869,6 +1935,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
ret = set_engines(ctx, args);
break;
+ case I915_CONTEXT_PARAM_PERSISTENCE:
+ ret = set_persistence(ctx, args);
+ break;
+
case I915_CONTEXT_PARAM_BAN_PERIOD:
default:
ret = -EINVAL;
@@ -1930,20 +2000,23 @@ static int clone_engines(struct i915_gem_context *dst,
*/
if (intel_engine_is_virtual(engine))
clone->engines[n] =
- intel_execlists_clone_virtual(dst, engine);
+ intel_execlists_clone_virtual(engine);
else
- clone->engines[n] = intel_context_create(dst, engine);
+ clone->engines[n] = intel_context_create(engine);
if (IS_ERR_OR_NULL(clone->engines[n])) {
__free_engines(clone, n);
goto err_unlock;
}
+
+ intel_context_set_gem(clone->engines[n], dst);
}
clone->num_engines = n;
user_engines = i915_gem_context_user_engines(src);
i915_gem_context_unlock_engines(src);
- free_engines(dst->engines);
+ /* Serialised by constructor */
+ free_engines(__context_engines_static(dst));
RCU_INIT_POINTER(dst->engines, clone);
if (user_engines)
i915_gem_context_set_user_engines(dst);
@@ -1978,7 +2051,8 @@ static int clone_sseu(struct i915_gem_context *dst,
unsigned long n;
int err;
- clone = dst->engines; /* no locking required; sole access */
+ /* no locking required; sole access under constructor*/
+ clone = __context_engines_static(dst);
if (e->num_engines != clone->num_engines) {
err = -EINVAL;
goto unlock;
@@ -2011,13 +2085,8 @@ unlock:
static int clone_timeline(struct i915_gem_context *dst,
struct i915_gem_context *src)
{
- if (src->timeline) {
- GEM_BUG_ON(src->timeline == dst->timeline);
-
- if (dst->timeline)
- i915_timeline_put(dst->timeline);
- dst->timeline = i915_timeline_get(src->timeline);
- }
+ if (src->timeline)
+ __assign_timeline(dst, src->timeline);
return 0;
}
@@ -2026,44 +2095,24 @@ static int clone_vm(struct i915_gem_context *dst,
struct i915_gem_context *src)
{
struct i915_address_space *vm;
+ int err = 0;
- rcu_read_lock();
- do {
- vm = READ_ONCE(src->vm);
- if (!vm)
- break;
-
- if (!kref_get_unless_zero(&vm->ref))
- continue;
-
- /*
- * This ppgtt may have be reallocated between
- * the read and the kref, and reassigned to a third
- * context. In order to avoid inadvertent sharing
- * of this ppgtt with that third context (and not
- * src), we have to confirm that we have the same
- * ppgtt after passing through the strong memory
- * barrier implied by a successful
- * kref_get_unless_zero().
- *
- * Once we have acquired the current ppgtt of src,
- * we no longer care if it is released from src, as
- * it cannot be reallocated elsewhere.
- */
-
- if (vm == READ_ONCE(src->vm))
- break;
+ if (!rcu_access_pointer(src->vm))
+ return 0;
- i915_vm_put(vm);
- } while (1);
+ rcu_read_lock();
+ vm = context_get_vm_rcu(src);
rcu_read_unlock();
- if (vm) {
+ if (!mutex_lock_interruptible(&dst->mutex)) {
__assign_ppgtt(dst, vm);
- i915_vm_put(vm);
+ mutex_unlock(&dst->mutex);
+ } else {
+ err = -EINTR;
}
- return 0;
+ i915_vm_put(vm);
+ return err;
}
static int create_clone(struct i915_user_extension __user *ext, void *data)
@@ -2134,6 +2183,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_context_create_ext *args = data;
struct create_ext ext_data;
int ret;
+ u32 id;
if (!DRIVER_CAPS(i915)->has_logical_contexts)
return -ENODEV;
@@ -2141,24 +2191,18 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
return -EINVAL;
- ret = i915_terminally_wedged(i915);
+ ret = intel_gt_terminally_wedged(&i915->gt);
if (ret)
return ret;
ext_data.fpriv = file->driver_priv;
if (client_is_banned(ext_data.fpriv)) {
DRM_DEBUG("client %s[%d] banned from creating ctx\n",
- current->comm,
- pid_nr(get_task_pid(current, PIDTYPE_PID)));
+ current->comm, task_pid_nr(current));
return -EIO;
}
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ret;
-
ext_data.ctx = i915_gem_create_context(i915, args->flags);
- mutex_unlock(&dev->struct_mutex);
if (IS_ERR(ext_data.ctx))
return PTR_ERR(ext_data.ctx);
@@ -2171,11 +2215,11 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
goto err_ctx;
}
- ret = gem_context_register(ext_data.ctx, ext_data.fpriv);
+ ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id);
if (ret < 0)
goto err_ctx;
- args->ctx_id = ret;
+ args->ctx_id = id;
DRM_DEBUG("HW context %d created\n", args->ctx_id);
return 0;
@@ -2198,11 +2242,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
if (!args->ctx_id)
return -ENOENT;
- if (mutex_lock_interruptible(&file_priv->context_idr_lock))
- return -EINTR;
-
- ctx = idr_remove(&file_priv->context_idr, args->ctx_id);
- mutex_unlock(&file_priv->context_idr_lock);
+ ctx = xa_erase(&file_priv->context_xa, args->ctx_id);
if (!ctx)
return -ENOENT;
@@ -2285,12 +2325,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
case I915_CONTEXT_PARAM_GTT_SIZE:
args->size = 0;
- if (ctx->vm)
- args->value = ctx->vm->total;
- else if (to_i915(dev)->mm.aliasing_ppgtt)
- args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+ rcu_read_lock();
+ if (rcu_access_pointer(ctx->vm))
+ args->value = rcu_dereference(ctx->vm)->total;
else
args->value = to_i915(dev)->ggtt.vm.total;
+ rcu_read_unlock();
break;
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
@@ -2325,6 +2365,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
ret = get_engines(ctx, args);
break;
+ case I915_CONTEXT_PARAM_PERSISTENCE:
+ args->size = 0;
+ args->value = i915_gem_context_is_persistent(ctx);
+ break;
+
case I915_CONTEXT_PARAM_BAN_PERIOD:
default:
ret = -EINVAL;
@@ -2356,7 +2401,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
void *data, struct drm_file *file)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
+ struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_reset_stats *args = data;
struct i915_gem_context *ctx;
int ret;
@@ -2378,7 +2423,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
*/
if (capable(CAP_SYS_ADMIN))
- args->reset_count = i915_reset_count(&dev_priv->gpu_error);
+ args->reset_count = i915_reset_count(&i915->gpu_error);
else
args->reset_count = 0;
@@ -2391,33 +2436,6 @@ out:
return ret;
}
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
- struct drm_i915_private *i915 = ctx->i915;
- int err = 0;
-
- mutex_lock(&i915->contexts.mutex);
-
- GEM_BUG_ON(i915_gem_context_is_closed(ctx));
-
- if (list_empty(&ctx->hw_id_link)) {
- GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
-
- err = assign_hw_id(i915, &ctx->hw_id);
- if (err)
- goto out_unlock;
-
- list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
- }
-
- GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
- atomic_inc(&ctx->hw_id_pin_count);
-
-out_unlock:
- mutex_unlock(&i915->contexts.mutex);
- return err;
-}
-
/* GEM context-engines iterator: for_each_gem_engine() */
struct intel_context *
i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 9691dd062f72..3ae61a355d87 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -11,6 +11,7 @@
#include "gt/intel_context.h"
+#include "i915_drv.h"
#include "i915_gem.h"
#include "i915_scheduler.h"
#include "intel_device_info.h"
@@ -74,24 +75,19 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c
clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
}
-static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
+static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx)
{
- return test_bit(CONTEXT_BANNED, &ctx->flags);
+ return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
}
-static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx)
+static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx)
{
- set_bit(CONTEXT_BANNED, &ctx->flags);
+ set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
}
-static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx)
+static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx)
{
- return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
-}
-
-static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx)
-{
- __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
+ clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
}
static inline bool
@@ -112,37 +108,15 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
}
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
-static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
- if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
- return 0;
-
- return __i915_gem_context_pin_hw_id(ctx);
-}
-
-static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
-{
- GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
- atomic_dec(&ctx->hw_id_pin_count);
-}
-
-static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
-{
- return !ctx->file_priv;
-}
-
/* i915_gem_context.c */
-int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
+void i915_gem_init__contexts(struct drm_i915_private *i915);
+void i915_gem_driver_release__contexts(struct drm_i915_private *i915);
int i915_gem_context_open(struct drm_i915_private *i915,
struct drm_file *file);
void i915_gem_context_close(struct drm_file *file);
void i915_gem_context_release(struct kref *ctx_ref);
-struct i915_gem_context *
-i915_gem_context_create_gvt(struct drm_device *dev);
int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
@@ -160,9 +134,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
-struct i915_gem_context *
-i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
-
static inline struct i915_gem_context *
i915_gem_context_get(struct i915_gem_context *ctx)
{
@@ -175,6 +146,27 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
kref_put(&ctx->ref, i915_gem_context_release);
}
+static inline struct i915_address_space *
+i915_gem_context_vm(struct i915_gem_context *ctx)
+{
+ return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex));
+}
+
+static inline struct i915_address_space *
+i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx)
+{
+ struct i915_address_space *vm;
+
+ rcu_read_lock();
+ vm = rcu_dereference(ctx->vm);
+ if (!vm)
+ vm = &ctx->i915->ggtt.vm;
+ vm = i915_vm_get(vm);
+ rcu_read_unlock();
+
+ return vm;
+}
+
static inline struct i915_gem_engines *
i915_gem_context_engines(struct i915_gem_context *ctx)
{
@@ -198,12 +190,6 @@ i915_gem_context_unlock_engines(struct i915_gem_context *ctx)
}
static inline struct intel_context *
-i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx)
-{
- return i915_gem_context_engines(ctx)->engines[idx];
-}
-
-static inline struct intel_context *
i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx)
{
struct intel_context *ce = ERR_PTR(-EINVAL);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index cc513410eeef..017ca803ab47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -26,7 +26,7 @@ struct pid;
struct drm_i915_private;
struct drm_i915_file_private;
struct i915_address_space;
-struct i915_timeline;
+struct intel_timeline;
struct intel_ring;
struct i915_gem_engines {
@@ -77,7 +77,7 @@ struct i915_gem_context {
struct i915_gem_engines __rcu *engines;
struct mutex engines_mutex; /* guards writes to engines */
- struct i915_timeline *timeline;
+ struct intel_timeline *timeline;
/**
* @vm: unique address space (GTT)
@@ -88,7 +88,7 @@ struct i915_gem_context {
* In other modes, this is a NULL pointer with the expectation that
* the caller uses the shared global GTT.
*/
- struct i915_address_space *vm;
+ struct i915_address_space __rcu *vm;
/**
* @pid: process id of creator
@@ -100,15 +100,6 @@ struct i915_gem_context {
*/
struct pid *pid;
- /**
- * @name: arbitrary name
- *
- * A name is constructed for the context from the creator's process
- * name, pid and user handle in order to uniquely identify the
- * context in messages.
- */
- const char *name;
-
/** link: place with &drm_i915_private.context_list */
struct list_head link;
struct llist_node free_link;
@@ -137,43 +128,19 @@ struct i915_gem_context {
#define UCONTEXT_NO_ERROR_CAPTURE 1
#define UCONTEXT_BANNABLE 2
#define UCONTEXT_RECOVERABLE 3
+#define UCONTEXT_PERSISTENCE 4
/**
* @flags: small set of booleans
*/
unsigned long flags;
-#define CONTEXT_BANNED 0
-#define CONTEXT_CLOSED 1
-#define CONTEXT_FORCE_SINGLE_SUBMISSION 2
-#define CONTEXT_USER_ENGINES 3
-
- /**
- * @hw_id: - unique identifier for the context
- *
- * The hardware needs to uniquely identify the context for a few
- * functions like fault reporting, PASID, scheduling. The
- * &drm_i915_private.context_hw_ida is used to assign a unqiue
- * id for the lifetime of the context.
- *
- * @hw_id_pin_count: - number of times this context had been pinned
- * for use (should be, at most, once per engine).
- *
- * @hw_id_link: - all contexts with an assigned id are tracked
- * for possible repossession.
- */
- unsigned int hw_id;
- atomic_t hw_id_pin_count;
- struct list_head hw_id_link;
+#define CONTEXT_CLOSED 0
+#define CONTEXT_USER_ENGINES 1
struct mutex mutex;
struct i915_sched_attr sched;
- /** ring_size: size for allocating the per-engine ring buffer */
- u32 ring_size;
- /** desc_template: invariant fields for the HW context descriptor */
- u32 desc_template;
-
/** guilty_count: How many times this context has caused a GPU hang. */
atomic_t guilty_count;
/**
@@ -197,6 +164,15 @@ struct i915_gem_context {
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;
+
+ /**
+ * @name: arbitrary name, used for user debug
+ *
+ * A name is constructed for the context from the creator's process
+ * name, pid and user handle in order to uniquely identify the
+ * context in messages.
+ */
+ char name[TASK_COMM_LEN + 8];
};
#endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index cbf1701d3acc..372b57ca0efc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -6,7 +6,7 @@
#include <linux/dma-buf.h>
#include <linux/highmem.h>
-#include <linux/reservation.h>
+#include <linux/dma-resv.h>
#include "i915_drv.h"
#include "i915_gem_object.h"
@@ -93,40 +93,6 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
i915_gem_object_unpin_map(obj);
}
-static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
-{
- struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
- struct page *page;
-
- if (page_num >= obj->base.size >> PAGE_SHIFT)
- return NULL;
-
- if (!i915_gem_object_has_struct_page(obj))
- return NULL;
-
- if (i915_gem_object_pin_pages(obj))
- return NULL;
-
- /* Synchronisation is left to the caller (via .begin_cpu_access()) */
- page = i915_gem_object_get_page(obj, page_num);
- if (IS_ERR(page))
- goto err_unpin;
-
- return kmap(page);
-
-err_unpin:
- i915_gem_object_unpin_pages(obj);
- return NULL;
-}
-
-static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
-{
- struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-
- kunmap(virt_to_page(addr));
- i915_gem_object_unpin_pages(obj);
-}
-
static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
@@ -195,8 +161,6 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
.map_dma_buf = i915_gem_map_dma_buf,
.unmap_dma_buf = i915_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
- .map = i915_gem_dmabuf_kmap,
- .unmap = i915_gem_dmabuf_kunmap,
.mmap = i915_gem_dmabuf_mmap,
.vmap = i915_gem_dmabuf_vmap,
.vunmap = i915_gem_dmabuf_vunmap,
@@ -204,8 +168,7 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
.end_cpu_access = i915_gem_end_cpu_access,
};
-struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
- struct drm_gem_object *gem_obj, int flags)
+struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
{
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
@@ -222,7 +185,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
return ERR_PTR(ret);
}
- return drm_gem_dmabuf_export(dev, &exp_info);
+ return drm_gem_dmabuf_export(gem_obj->dev, &exp_info);
}
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
@@ -257,6 +220,7 @@ static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)
{
+ static struct lock_class_key lock_class;
struct dma_buf_attachment *attach;
struct drm_i915_gem_object *obj;
int ret;
@@ -288,7 +252,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
}
drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
- i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
+ i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class);
obj->base.import_attach = attach;
obj->base.resv = dma_buf->resv;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 2e3ce2a69653..0cc40e77bbd2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -12,6 +12,8 @@
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
#include "i915_vma.h"
+#include "i915_gem_lmem.h"
+#include "i915_gem_mman.h"
static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
{
@@ -27,7 +29,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
{
- if (!READ_ONCE(obj->pin_global))
+ if (!i915_gem_object_is_framebuffer(obj))
return;
i915_gem_object_lock(obj);
@@ -148,9 +150,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
obj->read_domains |= I915_GEM_DOMAIN_GTT;
if (write) {
+ struct i915_vma *vma;
+
obj->read_domains = I915_GEM_DOMAIN_GTT;
obj->write_domain = I915_GEM_DOMAIN_GTT;
obj->mm.dirty = true;
+
+ spin_lock(&obj->vma.lock);
+ for_each_ggtt_vma(vma, obj)
+ if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+ i915_vma_set_ggtt_write(vma);
+ spin_unlock(&obj->vma.lock);
}
i915_gem_object_unpin_pages(obj);
@@ -175,112 +185,34 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level)
{
- struct i915_vma *vma;
int ret;
- assert_object_held(obj);
-
if (obj->cache_level == cache_level)
return 0;
- /* Inspect the list of currently bound VMA and unbind any that would
- * be invalid given the new cache-level. This is principally to
- * catch the issue of the CS prefetch crossing page boundaries and
- * reading an invalid PTE on older architectures.
- */
-restart:
- list_for_each_entry(vma, &obj->vma.list, obj_link) {
- if (!drm_mm_node_allocated(&vma->node))
- continue;
-
- if (i915_vma_is_pinned(vma)) {
- DRM_DEBUG("can not change the cache level of pinned objects\n");
- return -EBUSY;
- }
-
- if (!i915_vma_is_closed(vma) &&
- i915_gem_valid_gtt_space(vma, cache_level))
- continue;
-
- ret = i915_vma_unbind(vma);
- if (ret)
- return ret;
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret)
+ return ret;
- /* As unbinding may affect other elements in the
- * obj->vma_list (due to side-effects from retiring
- * an active vma), play safe and restart the iterator.
- */
- goto restart;
- }
+ ret = i915_gem_object_lock_interruptible(obj);
+ if (ret)
+ return ret;
- /* We can reuse the existing drm_mm nodes but need to change the
- * cache-level on the PTE. We could simply unbind them all and
- * rebind with the correct cache-level on next use. However since
- * we already have a valid slot, dma mapping, pages etc, we may as
- * rewrite the PTE in the belief that doing so tramples upon less
- * state and so involves less work.
- */
- if (atomic_read(&obj->bind_count)) {
- /* Before we change the PTE, the GPU must not be accessing it.
- * If we wait upon the object, we know that all the bound
- * VMA are no longer active.
- */
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- return ret;
-
- if (!HAS_LLC(to_i915(obj->base.dev)) &&
- cache_level != I915_CACHE_NONE) {
- /* Access to snoopable pages through the GTT is
- * incoherent and on some machines causes a hard
- * lockup. Relinquish the CPU mmaping to force
- * userspace to refault in the pages and we can
- * then double check if the GTT mapping is still
- * valid for that pointer access.
- */
- i915_gem_object_release_mmap(obj);
-
- /* As we no longer need a fence for GTT access,
- * we can relinquish it now (and so prevent having
- * to steal a fence from someone else on the next
- * fence request). Note GPU activity would have
- * dropped the fence as all snoopable access is
- * supposed to be linear.
- */
- for_each_ggtt_vma(vma, obj) {
- ret = i915_vma_put_fence(vma);
- if (ret)
- return ret;
- }
- } else {
- /* We either have incoherent backing store and
- * so no GTT access or the architecture is fully
- * coherent. In such cases, existing GTT mmaps
- * ignore the cache bit in the PTE and we can
- * rewrite it without confusing the GPU or having
- * to force userspace to fault back in its mmaps.
- */
- }
-
- list_for_each_entry(vma, &obj->vma.list, obj_link) {
- if (!drm_mm_node_allocated(&vma->node))
- continue;
-
- ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
- if (ret)
- return ret;
- }
+ /* Always invalidate stale cachelines */
+ if (obj->cache_level != cache_level) {
+ i915_gem_object_set_cache_coherency(obj, cache_level);
+ obj->cache_dirty = true;
}
- list_for_each_entry(vma, &obj->vma.list, obj_link)
- vma->node.color = cache_level;
- i915_gem_object_set_cache_coherency(obj, cache_level);
- obj->cache_dirty = true; /* Always invalidate stale cachelines */
+ i915_gem_object_unlock(obj);
- return 0;
+ /* The cache-level will be applied when each vma is rebound. */
+ return i915_gem_object_unbind(obj,
+ I915_GEM_OBJECT_UNBIND_ACTIVE |
+ I915_GEM_OBJECT_UNBIND_BARRIER);
}
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
@@ -361,25 +293,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
goto out;
}
- if (obj->cache_level == level)
- goto out;
-
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- if (ret)
- goto out;
-
- ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
- if (ret)
- goto out;
-
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret == 0) {
- ret = i915_gem_object_set_cache_level(obj, level);
- i915_gem_object_unlock(obj);
- }
- mutex_unlock(&i915->drm.struct_mutex);
+ ret = i915_gem_object_set_cache_level(obj, level);
out:
i915_gem_object_put(obj);
@@ -398,17 +312,16 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
unsigned int flags)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_vma *vma;
int ret;
- assert_object_held(obj);
-
- /* Mark the global pin early so that we account for the
- * display coherency whilst setting up the cache domains.
- */
- obj->pin_global++;
+ /* Frame buffer must be in LMEM (no migration yet) */
+ if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
+ return ERR_PTR(-EINVAL);
- /* The display engine is not coherent with the LLC cache on gen6. As
+ /*
+ * The display engine is not coherent with the LLC cache on gen6. As
* a result, we make sure that the pinning that is about to occur is
* done with uncached PTEs. This is lowest common denominator for all
* chipsets.
@@ -418,14 +331,13 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
* with that bit in the PTE to main memory with just one PIPE_CONTROL.
*/
ret = i915_gem_object_set_cache_level(obj,
- HAS_WT(to_i915(obj->base.dev)) ?
+ HAS_WT(i915) ?
I915_CACHE_WT : I915_CACHE_NONE);
- if (ret) {
- vma = ERR_PTR(ret);
- goto err_unpin_global;
- }
+ if (ret)
+ return ERR_PTR(ret);
- /* As the user may map the buffer once pinned in the display plane
+ /*
+ * As the user may map the buffer once pinned in the display plane
* (e.g. libkms for the bootup splash), we have to ensure that we
* always use map_and_fenceable for all scanout buffers. However,
* it may simply be too big to fit into mappable, in which case
@@ -442,21 +354,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
if (IS_ERR(vma))
- goto err_unpin_global;
+ return vma;
vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
- __i915_gem_object_flush_for_display(obj);
-
- /* It should now be out of any other write domains, and we can update
- * the domain values for our changes.
- */
- obj->read_domains |= I915_GEM_DOMAIN_GTT;
-
- return vma;
+ i915_gem_object_flush_if_display(obj);
-err_unpin_global:
- obj->pin_global--;
return vma;
}
@@ -466,14 +369,19 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
struct i915_vma *vma;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ if (!atomic_read(&obj->bind_count))
+ return;
mutex_lock(&i915->ggtt.vm.mutex);
+ spin_lock(&obj->vma.lock);
for_each_ggtt_vma(vma, obj) {
if (!drm_mm_node_allocated(&vma->node))
continue;
+ GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
list_move_tail(&vma->vm_link, &vma->vm->bound_list);
}
+ spin_unlock(&obj->vma.lock);
mutex_unlock(&i915->ggtt.vm.mutex);
if (i915_gem_object_is_shrinkable(obj)) {
@@ -481,7 +389,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
spin_lock_irqsave(&i915->mm.obj_lock, flags);
- if (obj->mm.madv == I915_MADV_WILLNEED)
+ if (obj->mm.madv == I915_MADV_WILLNEED &&
+ !atomic_read(&obj->mm.shrink_pin))
list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
@@ -495,12 +404,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
assert_object_held(obj);
- if (WARN_ON(obj->pin_global == 0))
- return;
-
- if (--obj->pin_global == 0)
- vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
-
/* Bump the LRU to try and avoid premature eviction whilst flipping */
i915_gem_object_bump_inactive_ggtt(obj);
@@ -551,13 +454,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
return 0;
}
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
- return (domain == I915_GEM_DOMAIN_GTT ?
- obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
/**
* Called when user space prepares to use an object with the CPU, either
* through the mmap ioctl's mapping or a GTT mapping.
@@ -661,9 +557,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
i915_gem_object_unlock(obj);
- if (write_domain != 0)
- intel_fb_obj_invalidate(obj,
- fb_write_origin(obj, write_domain));
+ if (write_domain)
+ i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
out_unpin:
i915_gem_object_unpin_pages(obj);
@@ -783,7 +678,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
}
out:
- intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+ i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
obj->mm.dirty = true;
/* return with the pages pinned */
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 41dab9ea33cd..60c984e10c4a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -5,7 +5,7 @@
*/
#include <linux/intel-iommu.h>
-#include <linux/reservation.h>
+#include <linux/dma-resv.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h>
@@ -16,13 +16,17 @@
#include "gem/i915_gem_ioctls.h"
#include "gt/intel_context.h"
+#include "gt/intel_engine_pool.h"
+#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
-#include "i915_gem_ioctls.h"
+#include "i915_drv.h"
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
+#include "i915_gem_ioctls.h"
+#include "i915_sw_fence_work.h"
#include "i915_trace.h"
-#include "intel_drv.h"
enum {
FORCE_CPU_RELOC = 1,
@@ -222,10 +226,10 @@ struct i915_execbuffer {
struct intel_engine_cs *engine; /** engine to queue the request to */
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
- struct i915_address_space *vm; /** GTT and vma for the request */
struct i915_request *request; /** our request to build */
struct i915_vma *batch; /** identity of the batch obj/vma */
+ struct i915_vma *trampoline; /** trampoline used for chaining */
/** actual size of execobj[] as we may extend it for the cmdparser */
unsigned int buffer_count;
@@ -274,28 +278,11 @@ struct i915_execbuffer {
#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
-/*
- * Used to convert any address to canonical form.
- * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
- * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
- * addresses to be in a canonical form:
- * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
- * canonical form [63:48] == [47]."
- */
-#define GEN8_HIGH_ADDRESS_BIT 47
-static inline u64 gen8_canonical_addr(u64 address)
-{
- return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
-}
-
-static inline u64 gen8_noncanonical_addr(u64 address)
-{
- return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
-}
-
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
- return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
+ return intel_engine_requires_cmd_parser(eb->engine) ||
+ (intel_engine_using_cmd_parser(eb->engine) &&
+ eb->args->batch_len);
}
static int eb_create(struct i915_execbuffer *eb)
@@ -696,7 +683,9 @@ static int eb_reserve(struct i915_execbuffer *eb)
case 1:
/* Too fragmented, unbind everything and retry */
- err = i915_gem_evict_vm(eb->vm);
+ mutex_lock(&eb->context->vm->mutex);
+ err = i915_gem_evict_vm(eb->context->vm);
+ mutex_unlock(&eb->context->vm->mutex);
if (err)
return err;
break;
@@ -724,12 +713,8 @@ static int eb_select_context(struct i915_execbuffer *eb)
return -ENOENT;
eb->gem_context = ctx;
- if (ctx->vm) {
- eb->vm = ctx->vm;
+ if (rcu_access_pointer(ctx->vm))
eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
- } else {
- eb->vm = &eb->i915->ggtt.vm;
- }
eb->context_flags = 0;
if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
@@ -738,63 +723,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
return 0;
}
-static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
-{
- struct i915_request *rq;
-
- /*
- * Completely unscientific finger-in-the-air estimates for suitable
- * maximum user request size (to avoid blocking) and then backoff.
- */
- if (intel_ring_update_space(ring) >= PAGE_SIZE)
- return NULL;
-
- /*
- * Find a request that after waiting upon, there will be at least half
- * the ring available. The hysteresis allows us to compete for the
- * shared ring and should mean that we sleep less often prior to
- * claiming our resources, but not so long that the ring completely
- * drains before we can submit our next request.
- */
- list_for_each_entry(rq, &ring->request_list, ring_link) {
- if (__intel_ring_space(rq->postfix,
- ring->emit, ring->size) > ring->size / 2)
- break;
- }
- if (&rq->ring_link == &ring->request_list)
- return NULL; /* weird, we will check again later for real */
-
- return i915_request_get(rq);
-}
-
-static int eb_wait_for_ring(const struct i915_execbuffer *eb)
-{
- struct i915_request *rq;
- int ret = 0;
-
- /*
- * Apply a light amount of backpressure to prevent excessive hogs
- * from blocking waiting for space whilst holding struct_mutex and
- * keeping all of their resources pinned.
- */
-
- rq = __eb_wait_for_ring(eb->context->ring);
- if (rq) {
- mutex_unlock(&eb->i915->drm.struct_mutex);
-
- if (i915_request_wait(rq,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT) < 0)
- ret = -EINTR;
-
- i915_request_put(rq);
-
- mutex_lock(&eb->i915->drm.struct_mutex);
- }
-
- return ret;
-}
-
static int eb_lookup_vmas(struct i915_execbuffer *eb)
{
struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
@@ -802,9 +730,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
unsigned int i, batch;
int err;
- if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
- return -EIO;
-
INIT_LIST_HEAD(&eb->relocs);
INIT_LIST_HEAD(&eb->unbound);
@@ -831,7 +756,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
goto err_vma;
}
- vma = i915_vma_instance(obj, eb->vm, NULL);
+ vma = i915_vma_instance(obj, eb->context->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -962,7 +887,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
- cache->node.allocated = false;
+ cache->node.flags = 0;
cache->rq = NULL;
cache->rq_size = 0;
}
@@ -994,7 +919,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
__i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size);
i915_gem_object_unpin_map(cache->rq->batch->obj);
- i915_gem_chipset_flush(cache->rq->i915);
+ intel_gt_chipset_flush(cache->rq->engine->gt);
i915_request_add(cache->rq);
cache->rq = NULL;
@@ -1018,15 +943,18 @@ static void reloc_cache_reset(struct reloc_cache *cache)
kunmap_atomic(vaddr);
i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
} else {
- wmb();
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
io_mapping_unmap_atomic((void __iomem *)vaddr);
- if (cache->node.allocated) {
- struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+ if (drm_mm_node_allocated(&cache->node)) {
ggtt->vm.clear_range(&ggtt->vm,
cache->node.start,
cache->node.size);
+ mutex_lock(&ggtt->vm.mutex);
drm_mm_remove_node(&cache->node);
+ mutex_unlock(&ggtt->vm.mutex);
} else {
i915_vma_unpin((struct i915_vma *)cache->node.mm);
}
@@ -1077,11 +1005,15 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
void *vaddr;
if (cache->vaddr) {
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
} else {
struct i915_vma *vma;
int err;
+ if (i915_gem_object_is_tiled(obj))
+ return ERR_PTR(-EINVAL);
+
if (use_cpu_reloc(cache, obj))
return NULL;
@@ -1093,32 +1025,27 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE |
- PIN_NONBLOCK |
- PIN_NONFAULT);
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
if (IS_ERR(vma)) {
memset(&cache->node, 0, sizeof(cache->node));
+ mutex_lock(&ggtt->vm.mutex);
err = drm_mm_insert_node_in_range
(&ggtt->vm.mm, &cache->node,
PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
0, ggtt->mappable_end,
DRM_MM_INSERT_LOW);
+ mutex_unlock(&ggtt->vm.mutex);
if (err) /* no inactive aperture space, use cpu reloc */
return NULL;
} else {
- err = i915_vma_put_fence(vma);
- if (err) {
- i915_vma_unpin(vma);
- return ERR_PTR(err);
- }
-
cache->node.start = vma->node.start;
cache->node.mm = (void *)vma;
}
}
offset = cache->node.start;
- if (cache->node.allocated) {
- wmb();
+ if (drm_mm_node_allocated(&cache->node)) {
ggtt->vm.insert_page(&ggtt->vm,
i915_gem_object_get_dma_address(obj, page),
offset, I915_CACHE_NONE, 0);
@@ -1201,25 +1128,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
- struct drm_i915_gem_object *obj;
+ struct intel_engine_pool_node *pool;
struct i915_request *rq;
struct i915_vma *batch;
u32 *cmd;
int err;
- obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
- cmd = i915_gem_object_pin_map(obj,
+ cmd = i915_gem_object_pin_map(pool->obj,
cache->has_llc ?
I915_MAP_FORCE_WB :
I915_MAP_FORCE_WC);
- i915_gem_object_unpin_pages(obj);
- if (IS_ERR(cmd))
- return PTR_ERR(cmd);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto out_pool;
+ }
- batch = i915_vma_instance(obj, vma->vm, NULL);
+ batch = i915_vma_instance(pool->obj, vma->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto err_unmap;
@@ -1235,6 +1163,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto err_unpin;
}
+ err = intel_engine_pool_mark_active(pool, rq);
+ if (err)
+ goto err_request;
+
err = reloc_move_to_gpu(rq, vma);
if (err)
goto err_request;
@@ -1246,8 +1178,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto skip_request;
i915_vma_lock(batch);
- GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
- err = i915_vma_move_to_active(batch, rq, 0);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
i915_vma_unlock(batch);
if (err)
goto skip_request;
@@ -1260,7 +1193,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
cache->rq_size = 0;
/* Return with batch mapping (cmd) still pinned */
- return 0;
+ goto out_pool;
skip_request:
i915_request_skip(rq, err);
@@ -1269,7 +1202,9 @@ err_request:
err_unpin:
i915_vma_unpin(batch);
err_unmap:
- i915_gem_object_unpin_map(obj);
+ i915_gem_object_unpin_map(pool->obj);
+out_pool:
+ intel_engine_pool_put(pool);
return err;
}
@@ -1286,10 +1221,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
if (unlikely(!cache->rq)) {
int err;
- /* If we need to copy for the cmdparser, we will stall anyway */
- if (eb_use_cmdparser(eb))
- return ERR_PTR(-EWOULDBLOCK);
-
if (!intel_engine_can_store_dword(eb->engine))
return ERR_PTR(-ENODEV);
@@ -1317,7 +1248,7 @@ relocate_entry(struct i915_vma *vma,
if (!eb->reloc_cache.vaddr &&
(DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
- !reservation_object_test_signaled_rcu(vma->resv, true))) {
+ !dma_resv_test_signaled_rcu(vma->resv, true))) {
const unsigned int gen = eb->reloc_cache.gen;
unsigned int len;
u32 *batch;
@@ -1442,7 +1373,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
IS_GEN(eb->i915, 6)) {
err = i915_vma_bind(target, target->obj->cache_level,
- PIN_GLOBAL);
+ PIN_GLOBAL, NULL);
if (WARN_ONCE(err,
"Unexpected failure to bind target VMA!"))
return err;
@@ -1952,7 +1883,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->exec = NULL;
/* Unconditionally flush any chipset caches (for streaming writes). */
- i915_gem_chipset_flush(eb->i915);
+ intel_gt_chipset_flush(eb->engine->gt);
return 0;
err_skip:
@@ -1960,15 +1891,15 @@ err_skip:
return err;
}
-static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
+static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
{
if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
- return false;
+ return -EINVAL;
/* Kernel clipping was a DRI1 misfeature */
if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
if (exec->num_cliprects || exec->cliprects_ptr)
- return false;
+ return -EINVAL;
}
if (exec->DR4 == 0xffffffff) {
@@ -1976,12 +1907,12 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
exec->DR4 = 0;
}
if (exec->DR1 || exec->DR4)
- return false;
+ return -EINVAL;
if ((exec->batch_start_offset | exec->batch_len) & 0x7)
- return false;
+ return -EINVAL;
- return true;
+ return 0;
}
static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
@@ -2009,51 +1940,227 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
return 0;
}
-static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
+static struct i915_vma *
+shadow_batch_pin(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ unsigned int flags)
{
- struct drm_i915_gem_object *shadow_batch_obj;
struct i915_vma *vma;
int err;
- shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
- PAGE_ALIGN(eb->batch_len));
- if (IS_ERR(shadow_batch_obj))
- return ERR_CAST(shadow_batch_obj);
-
- err = intel_engine_cmd_parser(eb->engine,
- eb->batch->obj,
- shadow_batch_obj,
- eb->batch_start_offset,
- eb->batch_len,
- is_master);
- if (err) {
- if (err == -EACCES) /* unhandled chained batch */
- vma = NULL;
- else
- vma = ERR_PTR(err);
- goto out;
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma))
+ return vma;
+
+ err = i915_vma_pin(vma, 0, 0, flags);
+ if (err)
+ return ERR_PTR(err);
+
+ return vma;
+}
+
+struct eb_parse_work {
+ struct dma_fence_work base;
+ struct intel_engine_cs *engine;
+ struct i915_vma *batch;
+ struct i915_vma *shadow;
+ struct i915_vma *trampoline;
+ unsigned int batch_offset;
+ unsigned int batch_length;
+};
+
+static int __eb_parse(struct dma_fence_work *work)
+{
+ struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+
+ return intel_engine_cmd_parser(pw->engine,
+ pw->batch,
+ pw->batch_offset,
+ pw->batch_length,
+ pw->shadow,
+ pw->trampoline);
+}
+
+static void __eb_parse_release(struct dma_fence_work *work)
+{
+ struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+
+ if (pw->trampoline)
+ i915_active_release(&pw->trampoline->active);
+ i915_active_release(&pw->shadow->active);
+ i915_active_release(&pw->batch->active);
+}
+
+static const struct dma_fence_work_ops eb_parse_ops = {
+ .name = "eb_parse",
+ .work = __eb_parse,
+ .release = __eb_parse_release,
+};
+
+static int eb_parse_pipeline(struct i915_execbuffer *eb,
+ struct i915_vma *shadow,
+ struct i915_vma *trampoline)
+{
+ struct eb_parse_work *pw;
+ int err;
+
+ pw = kzalloc(sizeof(*pw), GFP_KERNEL);
+ if (!pw)
+ return -ENOMEM;
+
+ err = i915_active_acquire(&eb->batch->active);
+ if (err)
+ goto err_free;
+
+ err = i915_active_acquire(&shadow->active);
+ if (err)
+ goto err_batch;
+
+ if (trampoline) {
+ err = i915_active_acquire(&trampoline->active);
+ if (err)
+ goto err_shadow;
}
- vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
- if (IS_ERR(vma))
- goto out;
+ dma_fence_work_init(&pw->base, &eb_parse_ops);
+
+ pw->engine = eb->engine;
+ pw->batch = eb->batch;
+ pw->batch_offset = eb->batch_start_offset;
+ pw->batch_length = eb->batch_len;
+ pw->shadow = shadow;
+ pw->trampoline = trampoline;
+
+ err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
+ if (err)
+ goto err_trampoline;
+
+ err = dma_resv_reserve_shared(pw->batch->resv, 1);
+ if (err)
+ goto err_batch_unlock;
+
+ /* Wait for all writes (and relocs) into the batch to complete */
+ err = i915_sw_fence_await_reservation(&pw->base.chain,
+ pw->batch->resv, NULL, false,
+ 0, I915_FENCE_GFP);
+ if (err < 0)
+ goto err_batch_unlock;
+
+ /* Keep the batch alive and unwritten as we parse */
+ dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
+
+ dma_resv_unlock(pw->batch->resv);
+
+ /* Force execution to wait for completion of the parser */
+ dma_resv_lock(shadow->resv, NULL);
+ dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
+ dma_resv_unlock(shadow->resv);
+
+ dma_fence_work_commit(&pw->base);
+ return 0;
+
+err_batch_unlock:
+ dma_resv_unlock(pw->batch->resv);
+err_trampoline:
+ if (trampoline)
+ i915_active_release(&trampoline->active);
+err_shadow:
+ i915_active_release(&shadow->active);
+err_batch:
+ i915_active_release(&eb->batch->active);
+err_free:
+ kfree(pw);
+ return err;
+}
+
+static int eb_parse(struct i915_execbuffer *eb)
+{
+ struct intel_engine_pool_node *pool;
+ struct i915_vma *shadow, *trampoline;
+ unsigned int len;
+ int err;
+
+ if (!eb_use_cmdparser(eb))
+ return 0;
+
+ len = eb->batch_len;
+ if (!CMDPARSER_USES_GGTT(eb->i915)) {
+ /*
+ * ppGTT backed shadow buffers must be mapped RO, to prevent
+ * post-scan tampering
+ */
+ if (!eb->context->vm->has_read_only) {
+ DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
+ return -EINVAL;
+ }
+ } else {
+ len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
+ }
+
+ pool = intel_engine_get_pool(eb->engine, len);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+
+ shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
+ if (IS_ERR(shadow)) {
+ err = PTR_ERR(shadow);
+ goto err;
+ }
+ i915_gem_object_set_readonly(shadow->obj);
+
+ trampoline = NULL;
+ if (CMDPARSER_USES_GGTT(eb->i915)) {
+ trampoline = shadow;
+
+ shadow = shadow_batch_pin(pool->obj,
+ &eb->engine->gt->ggtt->vm,
+ PIN_GLOBAL);
+ if (IS_ERR(shadow)) {
+ err = PTR_ERR(shadow);
+ shadow = trampoline;
+ goto err_shadow;
+ }
+
+ eb->batch_flags |= I915_DISPATCH_SECURE;
+ }
+
+ err = eb_parse_pipeline(eb, shadow, trampoline);
+ if (err)
+ goto err_trampoline;
- eb->vma[eb->buffer_count] = i915_vma_get(vma);
+ eb->vma[eb->buffer_count] = i915_vma_get(shadow);
eb->flags[eb->buffer_count] =
__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
- vma->exec_flags = &eb->flags[eb->buffer_count];
+ shadow->exec_flags = &eb->flags[eb->buffer_count];
eb->buffer_count++;
-out:
- i915_gem_object_unpin_pages(shadow_batch_obj);
- return vma;
+ eb->trampoline = trampoline;
+ eb->batch_start_offset = 0;
+ eb->batch = shadow;
+
+ shadow->private = pool;
+ return 0;
+
+err_trampoline:
+ if (trampoline)
+ i915_vma_unpin(trampoline);
+err_shadow:
+ i915_vma_unpin(shadow);
+err:
+ intel_engine_pool_put(pool);
+ return err;
}
static void
add_to_client(struct i915_request *rq, struct drm_file *file)
{
- rq->file_priv = file->driver_priv;
- list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
+ struct drm_i915_file_private *file_priv = file->driver_priv;
+
+ rq->file_priv = file_priv;
+
+ spin_lock(&file_priv->mm.lock);
+ list_add_tail(&rq->client_link, &file_priv->mm.request_list);
+ spin_unlock(&file_priv->mm.lock);
}
static int eb_submit(struct i915_execbuffer *eb)
@@ -2090,9 +2197,28 @@ static int eb_submit(struct i915_execbuffer *eb)
if (err)
return err;
+ if (eb->trampoline) {
+ GEM_BUG_ON(eb->batch_start_offset);
+ err = eb->engine->emit_bb_start(eb->request,
+ eb->trampoline->node.start +
+ eb->batch_len,
+ 0, 0);
+ if (err)
+ return err;
+ }
+
+ if (intel_context_nopreempt(eb->context))
+ __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
+
return 0;
}
+static int num_vcs_engines(const struct drm_i915_private *i915)
+{
+ return hweight64(INTEL_INFO(i915)->engine_mask &
+ GENMASK_ULL(VCS0 + I915_MAX_VCS - 1, VCS0));
+}
+
/*
* Find one BSD ring to dispatch the corresponding BSD command.
* The engine index is returned.
@@ -2105,8 +2231,8 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
/* Check whether the file_priv has already selected one ring. */
if ((int)file_priv->bsd_engine < 0)
- file_priv->bsd_engine = atomic_fetch_xor(1,
- &dev_priv->mm.bsd_engine_dispatch_index);
+ file_priv->bsd_engine =
+ get_random_int() % num_vcs_engines(dev_priv);
return file_priv->bsd_engine;
}
@@ -2119,18 +2245,57 @@ static const enum intel_engine_id user_ring_map[] = {
[I915_EXEC_VEBOX] = VECS0
};
-static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_throttle(struct intel_context *ce)
+{
+ struct intel_ring *ring = ce->ring;
+ struct intel_timeline *tl = ce->timeline;
+ struct i915_request *rq;
+
+ /*
+ * Completely unscientific finger-in-the-air estimates for suitable
+ * maximum user request size (to avoid blocking) and then backoff.
+ */
+ if (intel_ring_update_space(ring) >= PAGE_SIZE)
+ return NULL;
+
+ /*
+ * Find a request that after waiting upon, there will be at least half
+ * the ring available. The hysteresis allows us to compete for the
+ * shared ring and should mean that we sleep less often prior to
+ * claiming our resources, but not so long that the ring completely
+ * drains before we can submit our next request.
+ */
+ list_for_each_entry(rq, &tl->requests, link) {
+ if (rq->ring != ring)
+ continue;
+
+ if (__intel_ring_space(rq->postfix,
+ ring->emit, ring->size) > ring->size / 2)
+ break;
+ }
+ if (&rq->link == &tl->requests)
+ return NULL; /* weird, we will check again later for real */
+
+ return i915_request_get(rq);
+}
+
+static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
{
+ struct intel_timeline *tl;
+ struct i915_request *rq;
int err;
/*
* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged.
*/
- err = i915_terminally_wedged(eb->i915);
+ err = intel_gt_terminally_wedged(ce->engine->gt);
if (err)
return err;
+ if (unlikely(intel_context_is_banned(ce)))
+ return -EIO;
+
/*
* Pinning the contexts may generate requests in order to acquire
* GGTT space, so do this first before we reserve a seqno for
@@ -2140,14 +2305,60 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
if (err)
return err;
+ /*
+ * Take a local wakeref for preparing to dispatch the execbuf as
+ * we expect to access the hardware fairly frequently in the
+ * process, and require the engine to be kept awake between accesses.
+ * Upon dispatch, we acquire another prolonged wakeref that we hold
+ * until the timeline is idle, which in turn releases the wakeref
+ * taken on the engine, and the parent device.
+ */
+ tl = intel_context_timeline_lock(ce);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ goto err_unpin;
+ }
+
+ intel_context_enter(ce);
+ rq = eb_throttle(ce);
+
+ intel_context_timeline_unlock(tl);
+
+ if (rq) {
+ if (i915_request_wait(rq,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT) < 0) {
+ i915_request_put(rq);
+ err = -EINTR;
+ goto err_exit;
+ }
+
+ i915_request_put(rq);
+ }
+
eb->engine = ce->engine;
eb->context = ce;
return 0;
+
+err_exit:
+ mutex_lock(&tl->mutex);
+ intel_context_exit(ce);
+ intel_context_timeline_unlock(tl);
+err_unpin:
+ intel_context_unpin(ce);
+ return err;
}
-static void eb_unpin_context(struct i915_execbuffer *eb)
+static void eb_unpin_engine(struct i915_execbuffer *eb)
{
- intel_context_unpin(eb->context);
+ struct intel_context *ce = eb->context;
+ struct intel_timeline *tl = ce->timeline;
+
+ mutex_lock(&tl->mutex);
+ intel_context_exit(ce);
+ mutex_unlock(&tl->mutex);
+
+ intel_context_unpin(ce);
}
static unsigned int
@@ -2165,7 +2376,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
return -1;
}
- if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) {
+ if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
@@ -2192,9 +2403,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
}
static int
-eb_select_engine(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb,
+ struct drm_file *file,
+ struct drm_i915_gem_execbuffer2 *args)
{
struct intel_context *ce;
unsigned int idx;
@@ -2209,7 +2420,7 @@ eb_select_engine(struct i915_execbuffer *eb,
if (IS_ERR(ce))
return PTR_ERR(ce);
- err = eb_pin_context(eb, ce);
+ err = __eb_pin_engine(eb, ce);
intel_context_put(ce);
return err;
@@ -2351,6 +2562,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_i915_gem_exec_object2 *exec,
struct drm_syncobj **fences)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
struct dma_fence *exec_fence = NULL;
@@ -2362,7 +2574,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
~__EXEC_OBJECT_UNKNOWN_FLAGS);
- eb.i915 = to_i915(dev);
+ eb.i915 = i915;
eb.file = file;
eb.args = args;
if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
@@ -2379,11 +2591,19 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.buffer_count = args->buffer_count;
eb.batch_start_offset = args->batch_start_offset;
eb.batch_len = args->batch_len;
+ eb.trampoline = NULL;
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
+ if (INTEL_GEN(i915) >= 11)
+ return -ENODEV;
+
+ /* Return -EPERM to trigger fallback code on old binaries. */
+ if (!HAS_SECURE_BATCHES(i915))
+ return -EPERM;
+
if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ return -EPERM;
eb.batch_flags |= I915_DISPATCH_SECURE;
}
@@ -2427,25 +2647,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_destroy;
- /*
- * Take a local wakeref for preparing to dispatch the execbuf as
- * we expect to access the hardware fairly frequently in the
- * process. Upon first dispatch, we acquire another prolonged
- * wakeref that we hold until the GPU has been idle for at least
- * 100ms.
- */
- intel_gt_pm_get(eb.i915);
+ err = eb_pin_engine(&eb, file, args);
+ if (unlikely(err))
+ goto err_context;
err = i915_mutex_lock_interruptible(dev);
if (err)
- goto err_rpm;
-
- err = eb_select_engine(&eb, file, args);
- if (unlikely(err))
- goto err_unlock;
-
- err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
- if (unlikely(err))
goto err_engine;
err = eb_relocate(&eb);
@@ -2473,34 +2680,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
- if (eb_use_cmdparser(&eb)) {
- struct i915_vma *vma;
-
- vma = eb_parse(&eb, drm_is_current_master(file));
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_vma;
- }
-
- if (vma) {
- /*
- * Batch parsed and accepted:
- *
- * Set the DISPATCH_SECURE bit to remove the NON_SECURE
- * bit from MI_BATCH_BUFFER_START commands issued in
- * the dispatch_execbuffer implementations. We
- * specifically don't want that set on batches the
- * command parser has accepted.
- */
- eb.batch_flags |= I915_DISPATCH_SECURE;
- eb.batch_start_offset = 0;
- eb.batch = vma;
- }
- }
-
if (eb.batch_len == 0)
eb.batch_len = eb.batch->size - eb.batch_start_offset;
+ err = eb_parse(&eb);
+ if (err)
+ goto err_vma;
+
/*
* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
@@ -2572,11 +2758,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
* to explicitly hold another reference here.
*/
eb.request->batch = eb.batch;
+ if (eb.batch->private)
+ intel_engine_pool_mark_active(eb.batch->private, eb.request);
trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb);
err_request:
add_to_client(eb.request, file);
+ i915_request_get(eb.request);
i915_request_add(eb.request);
if (fences)
@@ -2592,19 +2781,22 @@ err_request:
fput(out_fence->file);
}
}
+ i915_request_put(eb.request);
err_batch_unpin:
if (eb.batch_flags & I915_DISPATCH_SECURE)
i915_vma_unpin(eb.batch);
+ if (eb.batch->private)
+ intel_engine_pool_put(eb.batch->private);
err_vma:
if (eb.exec)
eb_release_vmas(&eb);
-err_engine:
- eb_unpin_context(&eb);
-err_unlock:
+ if (eb.trampoline)
+ i915_vma_unpin(eb.trampoline);
mutex_unlock(&dev->struct_mutex);
-err_rpm:
- intel_gt_pm_put(eb.i915);
+err_engine:
+ eb_unpin_engine(&eb);
+err_context:
i915_gem_context_put(eb.gem_context);
err_destroy:
eb_destroy(&eb);
@@ -2670,8 +2862,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
exec2.flags = I915_EXEC_RENDER;
i915_execbuffer2_set_context_id(exec2, 0);
- if (!i915_gem_check_execbuffer(&exec2))
- return -EINVAL;
+ err = i915_gem_check_execbuffer(&exec2);
+ if (err)
+ return err;
/* Copy in the exec list from userland */
exec_list = kvmalloc_array(count, sizeof(*exec_list),
@@ -2748,8 +2941,9 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- if (!i915_gem_check_execbuffer(args))
- return -EINVAL;
+ err = i915_gem_check_execbuffer(args);
+ if (err)
+ return err;
/* Allocate an extra slot for use by the command parser */
exec2_list = kvmalloc_array(count + 1, eb_element_size(),
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
index cf0439e6be83..2f6100ec2608 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
@@ -69,8 +69,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
i915_sw_fence_init(&stub->chain, stub_notify);
dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock,
- to_i915(obj->base.dev)->mm.unordered_timeline,
- 0);
+ 0, 0);
if (i915_sw_fence_await_reservation(&stub->chain,
obj->base.resv, NULL,
@@ -78,7 +77,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
I915_FENCE_GFP) < 0)
goto err;
- reservation_object_add_excl_fence(obj->base.resv, &stub->dma);
+ dma_resv_add_excl_fence(obj->base.resv, &stub->dma);
return &stub->dma;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index 0c41e04ab8fa..9cfb0e41ff06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -117,13 +117,6 @@ create_st:
goto err;
}
- /* Mark the pages as dontneed whilst they are still pinned. As soon
- * as they are unpinned they are allowed to be reaped by the shrinker,
- * and the caller is expected to repopulate - the contents of this
- * object are only valid whilst active and pinned.
- */
- obj->mm.madv = I915_MADV_DONTNEED;
-
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
return 0;
@@ -143,7 +136,6 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
internal_free_pages(pages);
obj->mm.dirty = false;
- obj->mm.madv = I915_MADV_WILLNEED;
}
static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
@@ -172,6 +164,7 @@ struct drm_i915_gem_object *
i915_gem_object_create_internal(struct drm_i915_private *i915,
phys_addr_t size)
{
+ static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
unsigned int cache_level;
@@ -186,7 +179,16 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, size);
- i915_gem_object_init(obj, &i915_gem_object_internal_ops);
+ i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class);
+
+ /*
+ * Mark the object as volatile, such that the pages are marked as
+ * dontneed whilst they are still pinned. As soon as they are unpinned
+ * they are allowed to be reaped by the shrinker, and the caller is
+ * expected to repopulate - the contents of this object are only valid
+ * whilst active and pinned.
+ */
+ i915_gem_object_set_volatile(obj);
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
index ddc7f2a52b3e..87d8b27f426d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -28,8 +28,8 @@ int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
-int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file);
+int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
new file mode 100644
index 000000000000..70543c83df06
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_memory_region.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_lmem.h"
+#include "i915_drv.h"
+
+const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = {
+ .flags = I915_GEM_OBJECT_HAS_IOMEM,
+
+ .get_pages = i915_gem_object_get_pages_buddy,
+ .put_pages = i915_gem_object_put_pages_buddy,
+ .release = i915_gem_object_release_memory_region,
+};
+
+bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
+{
+ return obj->ops == &i915_gem_lmem_obj_ops;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_lmem(struct drm_i915_private *i915,
+ resource_size_t size,
+ unsigned int flags)
+{
+ return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM],
+ size, flags);
+}
+
+struct drm_i915_gem_object *
+__i915_gem_lmem_object_create(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags)
+{
+ static struct lock_class_key lock_class;
+ struct drm_i915_private *i915 = mem->i915;
+ struct drm_i915_gem_object *obj;
+
+ obj = i915_gem_object_alloc();
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ drm_gem_private_object_init(&i915->drm, &obj->base, size);
+ i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class);
+
+ obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT;
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+
+ i915_gem_object_init_memory_region(obj, mem, flags);
+
+ return obj;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
new file mode 100644
index 000000000000..fc3f15580fe3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_LMEM_H
+#define __I915_GEM_LMEM_H
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+struct intel_memory_region;
+
+extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops;
+
+bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_lmem(struct drm_i915_private *i915,
+ resource_size_t size,
+ unsigned int flags);
+
+struct drm_i915_gem_object *
+__i915_gem_lmem_object_create(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags);
+
+#endif /* !__I915_GEM_LMEM_H */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 39a661927d8e..0b6a442108de 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -4,15 +4,22 @@
* Copyright © 2014-2016 Intel Corporation
*/
+#include <linux/anon_inodes.h>
#include <linux/mman.h>
+#include <linux/pfn_t.h>
#include <linux/sizes.h>
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
+
#include "i915_drv.h"
#include "i915_gem_gtt.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
+#include "i915_gem_mman.h"
+#include "i915_trace.h"
+#include "i915_user_extensions.h"
#include "i915_vma.h"
-#include "intel_drv.h"
static inline bool
__vma_matches(struct vm_area_struct *vma, struct file *filp,
@@ -99,9 +106,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
up_write(&mm->mmap_sem);
if (IS_ERR_VALUE(addr))
goto err;
-
- /* This may race, but that's ok, it only gets set */
- WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
}
i915_gem_object_put(obj);
@@ -144,6 +148,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
* 3 - Remove implicit set-domain(GTT) and synchronisation on initial
* pagefault; swapin remains transparent.
*
+ * 4 - Support multiple fault handlers per object depending on object's
+ * backing storage (a.k.a. MMAP_OFFSET).
+ *
* Restrictions:
*
* * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -171,7 +178,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
*/
int i915_gem_mmap_gtt_version(void)
{
- return 3;
+ return 4;
}
static inline struct i915_ggtt_view
@@ -197,29 +204,80 @@ compute_partial_view(const struct drm_i915_gem_object *obj,
return view;
}
-/**
- * i915_gem_fault - fault a page into the GTT
- * @vmf: fault info
- *
- * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
- * from userspace. The fault handler takes care of binding the object to
- * the GTT (if needed), allocating and programming a fence register (again,
- * only if needed based on whether the old reg is still valid or the object
- * is tiled) and inserting a new PTE into the faulting process.
- *
- * Note that the faulting process may involve evicting existing objects
- * from the GTT and/or fence registers to make room. So performance may
- * suffer if the GTT working set is large or there are few fence registers
- * left.
- *
- * The current feature set supported by i915_gem_fault() and thus GTT mmaps
- * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
- */
-vm_fault_t i915_gem_fault(struct vm_fault *vmf)
+static vm_fault_t i915_error_to_vmf_fault(int err)
+{
+ switch (err) {
+ default:
+ WARN_ONCE(err, "unhandled error in %s: %i\n", __func__, err);
+ /* fallthrough */
+ case -EIO: /* shmemfs failure from swap device */
+ case -EFAULT: /* purged object */
+ case -ENODEV: /* bad object, how did you get here! */
+ case -ENXIO: /* unable to access backing store (on device) */
+ return VM_FAULT_SIGBUS;
+
+ case -ENOSPC: /* shmemfs allocation failure */
+ case -ENOMEM: /* our allocation failure */
+ return VM_FAULT_OOM;
+
+ case 0:
+ case -EAGAIN:
+ case -ERESTARTSYS:
+ case -EINTR:
+ case -EBUSY:
+ /*
+ * EBUSY is ok: this just means that another thread
+ * already did the job.
+ */
+ return VM_FAULT_NOPAGE;
+ }
+}
+
+static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
+{
+ struct vm_area_struct *area = vmf->vma;
+ struct i915_mmap_offset *mmo = area->vm_private_data;
+ struct drm_i915_gem_object *obj = mmo->obj;
+ resource_size_t iomap;
+ int err;
+
+ /* Sanity check that we allow writing into this object */
+ if (unlikely(i915_gem_object_is_readonly(obj) &&
+ area->vm_flags & VM_WRITE))
+ return VM_FAULT_SIGBUS;
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ goto out;
+
+ iomap = -1;
+ if (!i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE)) {
+ iomap = obj->mm.region->iomap.base;
+ iomap -= obj->mm.region->region.start;
+ }
+
+ /* PTEs are revoked in obj->ops->put_pages() */
+ err = remap_io_sg(area,
+ area->vm_start, area->vm_end - area->vm_start,
+ obj->mm.pages->sgl, iomap);
+
+ if (area->vm_flags & VM_WRITE) {
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ obj->mm.dirty = true;
+ }
+
+ i915_gem_object_unpin_pages(obj);
+
+out:
+ return i915_error_to_vmf_fault(err);
+}
+
+static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
{
#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
struct vm_area_struct *area = vmf->vma;
- struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
+ struct i915_mmap_offset *mmo = area->vm_private_data;
+ struct drm_i915_gem_object *obj = mmo->obj;
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *i915 = to_i915(dev);
struct intel_runtime_pm *rpm = &i915->runtime_pm;
@@ -246,34 +304,22 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
wakeref = intel_runtime_pm_get(rpm);
- srcu = i915_reset_trylock(i915);
- if (srcu < 0) {
- ret = srcu;
- goto err_rpm;
- }
-
- ret = i915_mutex_lock_interruptible(dev);
+ ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
if (ret)
- goto err_reset;
-
- /* Access to snoopable pages through the GTT is incoherent. */
- if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
- ret = -EFAULT;
- goto err_unlock;
- }
+ goto err_rpm;
/* Now pin it into the GTT as needed */
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE |
- PIN_NONBLOCK |
- PIN_NONFAULT);
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
if (IS_ERR(vma)) {
/* Use a partial view if it is bigger than available space */
struct i915_ggtt_view view =
compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
unsigned int flags;
- flags = PIN_MAPPABLE;
+ flags = PIN_MAPPABLE | PIN_NOSEARCH;
if (view.type == I915_GGTT_VIEW_NORMAL)
flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
@@ -281,18 +327,26 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
* Userspace is now writing through an untracked VMA, abandon
* all hope that the hardware is able to track future writes.
*/
- obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
- if (IS_ERR(vma) && !view.type) {
+ if (IS_ERR(vma)) {
flags = PIN_MAPPABLE;
view.type = I915_GGTT_VIEW_PARTIAL;
vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
}
+
+ /* The entire mappable GGTT is pinned? Unexpected! */
+ GEM_BUG_ON(vma == ERR_PTR(-ENOSPC));
}
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
- goto err_unlock;
+ goto err_reset;
+ }
+
+ /* Access to snoopable pages through the GTT is incoherent. */
+ if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+ ret = -EFAULT;
+ goto err_unpin;
}
ret = i915_vma_pin_fence(vma);
@@ -308,101 +362,67 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
if (ret)
goto err_fence;
- /* Mark as being mmapped into userspace for later revocation */
assert_rpm_wakelock_held(rpm);
+
+ /* Mark as being mmapped into userspace for later revocation */
+ mutex_lock(&i915->ggtt.vm.mutex);
if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
- if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
+ mutex_unlock(&i915->ggtt.vm.mutex);
+
+ /* Track the mmo associated with the fenced vma */
+ vma->mmo = mmo;
+
+ if (IS_ACTIVE(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND))
intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
- GEM_BUG_ON(!obj->userfault_count);
- i915_vma_set_ggtt_write(vma);
+ if (write) {
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ i915_vma_set_ggtt_write(vma);
+ obj->mm.dirty = true;
+ }
err_fence:
i915_vma_unpin_fence(vma);
err_unpin:
__i915_vma_unpin(vma);
-err_unlock:
- mutex_unlock(&dev->struct_mutex);
err_reset:
- i915_reset_unlock(i915, srcu);
+ intel_gt_reset_unlock(ggtt->vm.gt, srcu);
err_rpm:
intel_runtime_pm_put(rpm, wakeref);
i915_gem_object_unpin_pages(obj);
err:
- switch (ret) {
- case -EIO:
- /*
- * We eat errors when the gpu is terminally wedged to avoid
- * userspace unduly crashing (gl has no provisions for mmaps to
- * fail). But any other -EIO isn't ours (e.g. swap in failure)
- * and so needs to be reported.
- */
- if (!i915_terminally_wedged(i915))
- return VM_FAULT_SIGBUS;
- /* else, fall through */
- case -EAGAIN:
- /*
- * EAGAIN means the gpu is hung and we'll wait for the error
- * handler to reset everything when re-faulting in
- * i915_mutex_lock_interruptible.
- */
- case 0:
- case -ERESTARTSYS:
- case -EINTR:
- case -EBUSY:
- /*
- * EBUSY is ok: this just means that another thread
- * already did the job.
- */
- return VM_FAULT_NOPAGE;
- case -ENOMEM:
- return VM_FAULT_OOM;
- case -ENOSPC:
- case -EFAULT:
- return VM_FAULT_SIGBUS;
- default:
- WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
- return VM_FAULT_SIGBUS;
- }
+ return i915_error_to_vmf_fault(ret);
}
-void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
{
struct i915_vma *vma;
GEM_BUG_ON(!obj->userfault_count);
- obj->userfault_count = 0;
- list_del(&obj->userfault_link);
- drm_vma_node_unmap(&obj->base.vma_node,
- obj->base.dev->anon_inode->i_mapping);
-
for_each_ggtt_vma(vma, obj)
- i915_vma_unset_userfault(vma);
+ i915_vma_revoke_mmap(vma);
+
+ GEM_BUG_ON(obj->userfault_count);
}
-/**
- * i915_gem_object_release_mmap - remove physical page mappings
- * @obj: obj in question
- *
- * Preserve the reservation of the mmapping with the DRM core code, but
- * relinquish ownership of the pages back to the system.
- *
+/*
* It is vital that we remove the page mapping if we have mapped a tiled
* object through the GTT and then lose the fence register due to
* resource pressure. Similarly if the object has been moved out of the
* aperture, than pages mapped into userspace must be revoked. Removing the
* mapping will then trigger a page fault on the next user access, allowing
- * fixup by i915_gem_fault().
+ * fixup by vm_fault_gtt().
*/
-void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+static void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
intel_wakeref_t wakeref;
- /* Serialisation between user GTT access and our code depends upon
+ /*
+ * Serialisation between user GTT access and our code depends upon
* revoking the CPU's PTE whilst the mutex is held. The next user
* pagefault then has to wait until we release the mutex.
*
@@ -410,15 +430,16 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
* requirement that operations to the GGTT be made holding the RPM
* wakeref.
*/
- lockdep_assert_held(&i915->drm.struct_mutex);
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ mutex_lock(&i915->ggtt.vm.mutex);
if (!obj->userfault_count)
goto out;
- __i915_gem_object_release_mmap(obj);
+ __i915_gem_object_release_mmap_gtt(obj);
- /* Ensure that the CPU's PTE are revoked and there are not outstanding
+ /*
+ * Ensure that the CPU's PTE are revoked and there are not outstanding
* memory transactions from userspace before we return. The TLB
* flushing implied above by changing the PTE above *should* be
* sufficient, an extra barrier here just provides us with a bit
@@ -428,59 +449,217 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
wmb();
out:
+ mutex_unlock(&i915->ggtt.vm.mutex);
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
}
-static int create_mmap_offset(struct drm_i915_gem_object *obj)
+void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
+{
+ struct i915_mmap_offset *mmo, *mn;
+
+ spin_lock(&obj->mmo.lock);
+ rbtree_postorder_for_each_entry_safe(mmo, mn,
+ &obj->mmo.offsets, offset) {
+ /*
+ * vma_node_unmap for GTT mmaps handled already in
+ * __i915_gem_object_release_mmap_gtt
+ */
+ if (mmo->mmap_type == I915_MMAP_TYPE_GTT)
+ continue;
+
+ spin_unlock(&obj->mmo.lock);
+ drm_vma_node_unmap(&mmo->vma_node,
+ obj->base.dev->anon_inode->i_mapping);
+ spin_lock(&obj->mmo.lock);
+ }
+ spin_unlock(&obj->mmo.lock);
+}
+
+/**
+ * i915_gem_object_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmapping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ */
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+ i915_gem_object_release_mmap_gtt(obj);
+ i915_gem_object_release_mmap_offset(obj);
+}
+
+static struct i915_mmap_offset *
+lookup_mmo(struct drm_i915_gem_object *obj,
+ enum i915_mmap_type mmap_type)
+{
+ struct rb_node *rb;
+
+ spin_lock(&obj->mmo.lock);
+ rb = obj->mmo.offsets.rb_node;
+ while (rb) {
+ struct i915_mmap_offset *mmo =
+ rb_entry(rb, typeof(*mmo), offset);
+
+ if (mmo->mmap_type == mmap_type) {
+ spin_unlock(&obj->mmo.lock);
+ return mmo;
+ }
+
+ if (mmo->mmap_type < mmap_type)
+ rb = rb->rb_right;
+ else
+ rb = rb->rb_left;
+ }
+ spin_unlock(&obj->mmo.lock);
+
+ return NULL;
+}
+
+static struct i915_mmap_offset *
+insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo)
+{
+ struct rb_node *rb, **p;
+
+ spin_lock(&obj->mmo.lock);
+ rb = NULL;
+ p = &obj->mmo.offsets.rb_node;
+ while (*p) {
+ struct i915_mmap_offset *pos;
+
+ rb = *p;
+ pos = rb_entry(rb, typeof(*pos), offset);
+
+ if (pos->mmap_type == mmo->mmap_type) {
+ spin_unlock(&obj->mmo.lock);
+ drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
+ &mmo->vma_node);
+ kfree(mmo);
+ return pos;
+ }
+
+ if (pos->mmap_type < mmo->mmap_type)
+ p = &rb->rb_right;
+ else
+ p = &rb->rb_left;
+ }
+ rb_link_node(&mmo->offset, rb, p);
+ rb_insert_color(&mmo->offset, &obj->mmo.offsets);
+ spin_unlock(&obj->mmo.lock);
+
+ return mmo;
+}
+
+static struct i915_mmap_offset *
+mmap_offset_attach(struct drm_i915_gem_object *obj,
+ enum i915_mmap_type mmap_type,
+ struct drm_file *file)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_mmap_offset *mmo;
int err;
- err = drm_gem_create_mmap_offset(&obj->base);
+ mmo = lookup_mmo(obj, mmap_type);
+ if (mmo)
+ goto out;
+
+ mmo = kmalloc(sizeof(*mmo), GFP_KERNEL);
+ if (!mmo)
+ return ERR_PTR(-ENOMEM);
+
+ mmo->obj = obj;
+ mmo->mmap_type = mmap_type;
+ drm_vma_node_reset(&mmo->vma_node);
+
+ err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
+ &mmo->vma_node, obj->base.size / PAGE_SIZE);
if (likely(!err))
- return 0;
+ goto insert;
/* Attempt to reap some mmap space from dead objects */
- do {
- err = i915_gem_wait_for_idle(i915,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- if (err)
- break;
+ err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ goto err;
- i915_gem_drain_freed_objects(i915);
- err = drm_gem_create_mmap_offset(&obj->base);
- if (!err)
- break;
+ i915_gem_drain_freed_objects(i915);
+ err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
+ &mmo->vma_node, obj->base.size / PAGE_SIZE);
+ if (err)
+ goto err;
- } while (flush_delayed_work(&i915->gem.retire_work));
+insert:
+ mmo = insert_mmo(obj, mmo);
+ GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo);
+out:
+ if (file)
+ drm_vma_node_allow(&mmo->vma_node, file);
+ return mmo;
- return err;
+err:
+ kfree(mmo);
+ return ERR_PTR(err);
}
-int
-i915_gem_mmap_gtt(struct drm_file *file,
- struct drm_device *dev,
- u32 handle,
- u64 *offset)
+static int
+__assign_mmap_offset(struct drm_file *file,
+ u32 handle,
+ enum i915_mmap_type mmap_type,
+ u64 *offset)
{
struct drm_i915_gem_object *obj;
- int ret;
+ struct i915_mmap_offset *mmo;
+ int err;
obj = i915_gem_object_lookup(file, handle);
if (!obj)
return -ENOENT;
- ret = create_mmap_offset(obj);
- if (ret == 0)
- *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+ if (mmap_type == I915_MMAP_TYPE_GTT &&
+ i915_gem_object_never_bind_ggtt(obj)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (mmap_type != I915_MMAP_TYPE_GTT &&
+ !i915_gem_object_type_has(obj,
+ I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+ I915_GEM_OBJECT_HAS_IOMEM)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ mmo = mmap_offset_attach(obj, mmap_type, file);
+ if (IS_ERR(mmo)) {
+ err = PTR_ERR(mmo);
+ goto out;
+ }
+ *offset = drm_vma_node_offset_addr(&mmo->vma_node);
+ err = 0;
+out:
i915_gem_object_put(obj);
- return ret;
+ return err;
+}
+
+int
+i915_gem_dumb_mmap_offset(struct drm_file *file,
+ struct drm_device *dev,
+ u32 handle,
+ u64 *offset)
+{
+ enum i915_mmap_type mmap_type;
+
+ if (boot_cpu_has(X86_FEATURE_PAT))
+ mmap_type = I915_MMAP_TYPE_WC;
+ else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt))
+ return -ENODEV;
+ else
+ mmap_type = I915_MMAP_TYPE_GTT;
+
+ return __assign_mmap_offset(file, handle, mmap_type, offset);
}
/**
- * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * i915_gem_mmap_offset_ioctl - prepare an object for GTT mmap'ing
* @dev: DRM device
* @data: GTT mapping ioctl data
* @file: GEM object info
@@ -495,12 +674,220 @@ i915_gem_mmap_gtt(struct drm_file *file,
* userspace.
*/
int
-i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file)
+i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_i915_private *i915 = to_i915(dev);
+ struct drm_i915_gem_mmap_offset *args = data;
+ enum i915_mmap_type type;
+ int err;
+
+ /*
+ * Historically we failed to check args.pad and args.offset
+ * and so we cannot use those fields for user input and we cannot
+ * add -EINVAL for them as the ABI is fixed, i.e. old userspace
+ * may be feeding in garbage in those fields.
+ *
+ * if (args->pad) return -EINVAL; is verbotten!
+ */
+
+ err = i915_user_extensions(u64_to_user_ptr(args->extensions),
+ NULL, 0, NULL);
+ if (err)
+ return err;
+
+ switch (args->flags) {
+ case I915_MMAP_OFFSET_GTT:
+ if (!i915_ggtt_has_aperture(&i915->ggtt))
+ return -ENODEV;
+ type = I915_MMAP_TYPE_GTT;
+ break;
+
+ case I915_MMAP_OFFSET_WC:
+ if (!boot_cpu_has(X86_FEATURE_PAT))
+ return -ENODEV;
+ type = I915_MMAP_TYPE_WC;
+ break;
+
+ case I915_MMAP_OFFSET_WB:
+ type = I915_MMAP_TYPE_WB;
+ break;
+
+ case I915_MMAP_OFFSET_UC:
+ if (!boot_cpu_has(X86_FEATURE_PAT))
+ return -ENODEV;
+ type = I915_MMAP_TYPE_UC;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return __assign_mmap_offset(file, args->handle, type, &args->offset);
+}
+
+static void vm_open(struct vm_area_struct *vma)
+{
+ struct i915_mmap_offset *mmo = vma->vm_private_data;
+ struct drm_i915_gem_object *obj = mmo->obj;
+
+ GEM_BUG_ON(!obj);
+ i915_gem_object_get(obj);
+}
+
+static void vm_close(struct vm_area_struct *vma)
+{
+ struct i915_mmap_offset *mmo = vma->vm_private_data;
+ struct drm_i915_gem_object *obj = mmo->obj;
+
+ GEM_BUG_ON(!obj);
+ i915_gem_object_put(obj);
+}
+
+static const struct vm_operations_struct vm_ops_gtt = {
+ .fault = vm_fault_gtt,
+ .open = vm_open,
+ .close = vm_close,
+};
+
+static const struct vm_operations_struct vm_ops_cpu = {
+ .fault = vm_fault_cpu,
+ .open = vm_open,
+ .close = vm_close,
+};
+
+static int singleton_release(struct inode *inode, struct file *file)
+{
+ struct drm_i915_private *i915 = file->private_data;
+
+ cmpxchg(&i915->gem.mmap_singleton, file, NULL);
+ drm_dev_put(&i915->drm);
+
+ return 0;
+}
+
+static const struct file_operations singleton_fops = {
+ .owner = THIS_MODULE,
+ .release = singleton_release,
+};
+
+static struct file *mmap_singleton(struct drm_i915_private *i915)
+{
+ struct file *file;
+
+ rcu_read_lock();
+ file = i915->gem.mmap_singleton;
+ if (file && !get_file_rcu(file))
+ file = NULL;
+ rcu_read_unlock();
+ if (file)
+ return file;
+
+ file = anon_inode_getfile("i915.gem", &singleton_fops, i915, O_RDWR);
+ if (IS_ERR(file))
+ return file;
+
+ /* Everyone shares a single global address space */
+ file->f_mapping = i915->drm.anon_inode->i_mapping;
+
+ smp_store_mb(i915->gem.mmap_singleton, file);
+ drm_dev_get(&i915->drm);
+
+ return file;
+}
+
+/*
+ * This overcomes the limitation in drm_gem_mmap's assignment of a
+ * drm_gem_object as the vma->vm_private_data. Since we need to
+ * be able to resolve multiple mmap offsets which could be tied
+ * to a single gem object.
+ */
+int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
{
- struct drm_i915_gem_mmap_gtt *args = data;
+ struct drm_vma_offset_node *node;
+ struct drm_file *priv = filp->private_data;
+ struct drm_device *dev = priv->minor->dev;
+ struct drm_i915_gem_object *obj = NULL;
+ struct i915_mmap_offset *mmo = NULL;
+ struct file *anon;
+
+ if (drm_dev_is_unplugged(dev))
+ return -ENODEV;
+
+ rcu_read_lock();
+ drm_vma_offset_lock_lookup(dev->vma_offset_manager);
+ node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
+ vma->vm_pgoff,
+ vma_pages(vma));
+ if (node && drm_vma_node_is_allowed(node, priv)) {
+ /*
+ * Skip 0-refcnted objects as it is in the process of being
+ * destroyed and will be invalid when the vma manager lock
+ * is released.
+ */
+ mmo = container_of(node, struct i915_mmap_offset, vma_node);
+ obj = i915_gem_object_get_rcu(mmo->obj);
+ }
+ drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
+ rcu_read_unlock();
+ if (!obj)
+ return node ? -EACCES : -EINVAL;
+
+ if (i915_gem_object_is_readonly(obj)) {
+ if (vma->vm_flags & VM_WRITE) {
+ i915_gem_object_put(obj);
+ return -EINVAL;
+ }
+ vma->vm_flags &= ~VM_MAYWRITE;
+ }
- return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+ anon = mmap_singleton(to_i915(dev));
+ if (IS_ERR(anon)) {
+ i915_gem_object_put(obj);
+ return PTR_ERR(anon);
+ }
+
+ vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_private_data = mmo;
+
+ /*
+ * We keep the ref on mmo->obj, not vm_file, but we require
+ * vma->vm_file->f_mapping, see vma_link(), for later revocation.
+ * Our userspace is accustomed to having per-file resource cleanup
+ * (i.e. contexts, objects and requests) on their close(fd), which
+ * requires avoiding extraneous references to their filp, hence why
+ * we prefer to use an anonymous file for their mmaps.
+ */
+ fput(vma->vm_file);
+ vma->vm_file = anon;
+
+ switch (mmo->mmap_type) {
+ case I915_MMAP_TYPE_WC:
+ vma->vm_page_prot =
+ pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+ vma->vm_ops = &vm_ops_cpu;
+ break;
+
+ case I915_MMAP_TYPE_WB:
+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+ vma->vm_ops = &vm_ops_cpu;
+ break;
+
+ case I915_MMAP_TYPE_UC:
+ vma->vm_page_prot =
+ pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+ vma->vm_ops = &vm_ops_cpu;
+ break;
+
+ case I915_MMAP_TYPE_GTT:
+ vma->vm_page_prot =
+ pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+ vma->vm_ops = &vm_ops_gtt;
+ break;
+ }
+ vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
+ return 0;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
new file mode 100644
index 000000000000..862e01b7cb69
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
@@ -0,0 +1,31 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_MMAN_H__
+#define __I915_GEM_MMAN_H__
+
+#include <linux/mm_types.h>
+#include <linux/types.h>
+
+struct drm_device;
+struct drm_file;
+struct drm_i915_gem_object;
+struct file;
+struct i915_mmap_offset;
+struct mutex;
+
+int i915_gem_mmap_gtt_version(void);
+int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+
+int i915_gem_dumb_mmap_offset(struct drm_file *file_priv,
+ struct drm_device *dev,
+ u32 handle, u64 *offset);
+
+void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index be6caccce0c5..35985218bd85 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,13 +22,17 @@
*
*/
-#include "display/intel_frontbuffer.h"
+#include <linux/sched/mm.h>
+#include "display/intel_frontbuffer.h"
+#include "gt/intel_gt.h"
#include "i915_drv.h"
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
+#include "i915_gem_mman.h"
#include "i915_gem_object.h"
#include "i915_globals.h"
+#include "i915_trace.h"
static struct i915_global_object {
struct i915_global base;
@@ -45,35 +49,26 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
return kmem_cache_free(global.slab_objects, obj);
}
-static void
-frontbuffer_retire(struct i915_active_request *active,
- struct i915_request *request)
-{
- struct drm_i915_gem_object *obj =
- container_of(active, typeof(*obj), frontbuffer_write);
-
- intel_fb_obj_flush(obj, ORIGIN_CS);
-}
-
void i915_gem_object_init(struct drm_i915_gem_object *obj,
- const struct drm_i915_gem_object_ops *ops)
+ const struct drm_i915_gem_object_ops *ops,
+ struct lock_class_key *key)
{
- mutex_init(&obj->mm.lock);
+ __mutex_init(&obj->mm.lock, "obj->mm.lock", key);
spin_lock_init(&obj->vma.lock);
INIT_LIST_HEAD(&obj->vma.list);
+ INIT_LIST_HEAD(&obj->mm.link);
+
INIT_LIST_HEAD(&obj->lut_list);
- INIT_LIST_HEAD(&obj->batch_pool_link);
+
+ spin_lock_init(&obj->mmo.lock);
+ obj->mmo.offsets = RB_ROOT;
init_rcu_head(&obj->rcu);
obj->ops = ops;
- obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
- i915_active_request_init(&obj->frontbuffer_write,
- NULL, frontbuffer_retire);
-
obj->mm.madv = I915_MADV_WILLNEED;
INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
mutex_init(&obj->mm.get_page.lock);
@@ -105,6 +100,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
{
struct drm_i915_gem_object *obj = to_intel_bo(gem);
struct drm_i915_file_private *fpriv = file->driver_priv;
+ struct i915_mmap_offset *mmo, *mn;
struct i915_lut_handle *lut, *ln;
LIST_HEAD(close);
@@ -120,6 +116,11 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
}
i915_gem_object_unlock(obj);
+ spin_lock(&obj->mmo.lock);
+ rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset)
+ drm_vma_node_revoke(&mmo->vma_node, file);
+ spin_unlock(&obj->mmo.lock);
+
list_for_each_entry_safe(lut, ln, &close, obj_link) {
struct i915_gem_context *ctx = lut->ctx;
struct i915_vma *vma;
@@ -146,6 +147,19 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
}
}
+static void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+ struct drm_i915_gem_object *obj =
+ container_of(head, typeof(*obj), rcu);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+ dma_resv_fini(&obj->base._resv);
+ i915_gem_object_free(obj);
+
+ GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
+ atomic_dec(&i915->mm.free_count);
+}
+
static void __i915_gem_free_objects(struct drm_i915_private *i915,
struct llist_node *freed)
{
@@ -154,125 +168,106 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
llist_for_each_entry_safe(obj, on, freed, freed) {
- struct i915_vma *vma, *vn;
+ struct i915_mmap_offset *mmo, *mn;
trace_i915_gem_object_destroy(obj);
- mutex_lock(&i915->drm.struct_mutex);
-
- GEM_BUG_ON(i915_gem_object_is_active(obj));
- list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
- GEM_BUG_ON(i915_vma_is_active(vma));
- vma->flags &= ~I915_VMA_PIN_MASK;
- i915_vma_destroy(vma);
+ if (!list_empty(&obj->vma.list)) {
+ struct i915_vma *vma;
+
+ /*
+ * Note that the vma keeps an object reference while
+ * it is active, so it *should* not sleep while we
+ * destroy it. Our debug code errs insits it *might*.
+ * For the moment, play along.
+ */
+ spin_lock(&obj->vma.lock);
+ while ((vma = list_first_entry_or_null(&obj->vma.list,
+ struct i915_vma,
+ obj_link))) {
+ GEM_BUG_ON(vma->obj != obj);
+ spin_unlock(&obj->vma.lock);
+
+ __i915_vma_put(vma);
+
+ spin_lock(&obj->vma.lock);
+ }
+ spin_unlock(&obj->vma.lock);
}
- GEM_BUG_ON(!list_empty(&obj->vma.list));
- GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
- /*
- * This serializes freeing with the shrinker. Since the free
- * is delayed, first by RCU then by the workqueue, we want the
- * shrinker to be able to free pages of unreferenced objects,
- * or else we may oom whilst there are plenty of deferred
- * freed objects.
- */
- if (i915_gem_object_has_pages(obj) &&
- i915_gem_object_is_shrinkable(obj)) {
- unsigned long flags;
+ i915_gem_object_release_mmap(obj);
- spin_lock_irqsave(&i915->mm.obj_lock, flags);
- list_del_init(&obj->mm.link);
- spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+ rbtree_postorder_for_each_entry_safe(mmo, mn,
+ &obj->mmo.offsets,
+ offset) {
+ drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
+ &mmo->vma_node);
+ kfree(mmo);
}
-
- mutex_unlock(&i915->drm.struct_mutex);
+ obj->mmo.offsets = RB_ROOT;
GEM_BUG_ON(atomic_read(&obj->bind_count));
GEM_BUG_ON(obj->userfault_count);
- GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
GEM_BUG_ON(!list_empty(&obj->lut_list));
- if (obj->ops->release)
- obj->ops->release(obj);
-
atomic_set(&obj->mm.pages_pin_count, 0);
- __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+ __i915_gem_object_put_pages(obj);
GEM_BUG_ON(i915_gem_object_has_pages(obj));
+ bitmap_free(obj->bit_17);
if (obj->base.import_attach)
drm_prime_gem_destroy(&obj->base, NULL);
- drm_gem_object_release(&obj->base);
-
- bitmap_free(obj->bit_17);
- i915_gem_object_free(obj);
+ drm_gem_free_mmap_offset(&obj->base);
- GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
- atomic_dec(&i915->mm.free_count);
+ if (obj->ops->release)
+ obj->ops->release(obj);
- cond_resched();
+ /* But keep the pointer alive for RCU-protected lookups */
+ call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
}
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
}
void i915_gem_flush_free_objects(struct drm_i915_private *i915)
{
- struct llist_node *freed;
-
- /* Free the oldest, most stale object to keep the free_list short */
- freed = NULL;
- if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
- /* Only one consumer of llist_del_first() allowed */
- spin_lock(&i915->mm.free_lock);
- freed = llist_del_first(&i915->mm.free_list);
- spin_unlock(&i915->mm.free_lock);
- }
- if (unlikely(freed)) {
- freed->next = NULL;
+ struct llist_node *freed = llist_del_all(&i915->mm.free_list);
+
+ if (unlikely(freed))
__i915_gem_free_objects(i915, freed);
- }
}
static void __i915_gem_free_work(struct work_struct *work)
{
struct drm_i915_private *i915 =
container_of(work, struct drm_i915_private, mm.free_work);
- struct llist_node *freed;
-
- /*
- * All file-owned VMA should have been released by this point through
- * i915_gem_close_object(), or earlier by i915_gem_context_close().
- * However, the object may also be bound into the global GTT (e.g.
- * older GPUs without per-process support, or for direct access through
- * the GTT either for the user or for scanout). Those VMA still need to
- * unbound now.
- */
-
- spin_lock(&i915->mm.free_lock);
- while ((freed = llist_del_all(&i915->mm.free_list))) {
- spin_unlock(&i915->mm.free_lock);
- __i915_gem_free_objects(i915, freed);
- if (need_resched())
- return;
-
- spin_lock(&i915->mm.free_lock);
- }
- spin_unlock(&i915->mm.free_lock);
+ i915_gem_flush_free_objects(i915);
}
-static void __i915_gem_free_object_rcu(struct rcu_head *head)
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
{
- struct drm_i915_gem_object *obj =
- container_of(head, typeof(*obj), rcu);
+ struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
+
/*
- * We reuse obj->rcu for the freed list, so we had better not treat
- * it like a rcu_head from this point forwards. And we expect all
- * objects to be freed via this path.
+ * Before we free the object, make sure any pure RCU-only
+ * read-side critical sections are complete, e.g.
+ * i915_gem_busy_ioctl(). For the corresponding synchronized
+ * lookup see i915_gem_object_lookup_rcu().
*/
- destroy_rcu_head(&obj->rcu);
+ atomic_inc(&i915->mm.free_count);
+
+ /*
+ * This serializes freeing with the shrinker. Since the free
+ * is delayed, first by RCU then by the workqueue, we want the
+ * shrinker to be able to free pages of unreferenced objects,
+ * or else we may oom whilst there are plenty of deferred
+ * freed objects.
+ */
+ i915_gem_object_make_unshrinkable(obj);
/*
* Since we require blocking on struct_mutex to unbind the freed
@@ -288,27 +283,6 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head)
queue_work(i915->wq, &i915->mm.free_work);
}
-void i915_gem_free_object(struct drm_gem_object *gem_obj)
-{
- struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-
- /*
- * Before we free the object, make sure any pure RCU-only
- * read-side critical sections are complete, e.g.
- * i915_gem_busy_ioctl(). For the corresponding synchronized
- * lookup see i915_gem_object_lookup_rcu().
- */
- atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
- call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
-}
-
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
- return (domain == I915_GEM_DOMAIN_GTT ?
- obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
return !(obj->cache_level == I915_CACHE_NONE ||
@@ -319,7 +293,6 @@ void
i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
unsigned int flush_domains)
{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *vma;
assert_object_held(obj);
@@ -329,17 +302,14 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
switch (obj->write_domain) {
case I915_GEM_DOMAIN_GTT:
- i915_gem_flush_ggtt_writes(dev_priv);
-
- intel_fb_obj_flush(obj,
- fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
-
+ spin_lock(&obj->vma.lock);
for_each_ggtt_vma(vma, obj) {
- if (vma->iomap)
- continue;
-
- i915_vma_unset_ggtt_write(vma);
+ if (i915_vma_unset_ggtt_write(vma))
+ intel_gt_flush_ggtt_writes(vma->vm->gt);
}
+ spin_unlock(&obj->vma.lock);
+
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
break;
case I915_GEM_DOMAIN_WC:
@@ -359,6 +329,30 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
obj->write_domain = 0;
}
+void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin)
+{
+ struct intel_frontbuffer *front;
+
+ front = __intel_frontbuffer_get(obj);
+ if (front) {
+ intel_frontbuffer_flush(front, origin);
+ intel_frontbuffer_put(front);
+ }
+}
+
+void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin)
+{
+ struct intel_frontbuffer *front;
+
+ front = __intel_frontbuffer_get(obj);
+ if (front) {
+ intel_frontbuffer_invalidate(front, origin);
+ intel_frontbuffer_put(front);
+ }
+}
+
void i915_gem_init__objects(struct drm_i915_private *i915)
{
INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index dfebd5706f16..9c86f2dea947 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -13,9 +13,10 @@
#include <drm/i915_drm.h>
+#include "display/intel_frontbuffer.h"
#include "i915_gem_object_types.h"
-
#include "i915_gem_gtt.h"
+#include "i915_vma_types.h"
void i915_gem_init__objects(struct drm_i915_private *i915);
@@ -23,12 +24,14 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void);
void i915_gem_object_free(struct drm_i915_gem_object *obj);
void i915_gem_object_init(struct drm_i915_gem_object *obj,
- const struct drm_i915_gem_object_ops *ops);
+ const struct drm_i915_gem_object_ops *ops,
+ struct lock_class_key *key);
struct drm_i915_gem_object *
-i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size);
+i915_gem_object_create_shmem(struct drm_i915_private *i915,
+ resource_size_t size);
struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
- const void *data, size_t size);
+ const void *data, resource_size_t size);
extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
@@ -67,21 +70,29 @@ i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
}
static inline struct drm_i915_gem_object *
+i915_gem_object_get_rcu(struct drm_i915_gem_object *obj)
+{
+ if (obj && !kref_get_unless_zero(&obj->base.refcount))
+ obj = NULL;
+
+ return obj;
+}
+
+static inline struct drm_i915_gem_object *
i915_gem_object_lookup(struct drm_file *file, u32 handle)
{
struct drm_i915_gem_object *obj;
rcu_read_lock();
obj = i915_gem_object_lookup_rcu(file, handle);
- if (obj && !kref_get_unless_zero(&obj->base.refcount))
- obj = NULL;
+ obj = i915_gem_object_get_rcu(obj);
rcu_read_unlock();
return obj;
}
__deprecated
-extern struct drm_gem_object *
+struct drm_gem_object *
drm_gem_object_lookup(struct drm_file *file, u32 handle);
__attribute__((nonnull))
@@ -99,22 +110,27 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
__drm_gem_object_put(&obj->base);
}
-#define assert_object_held(obj) reservation_object_assert_held((obj)->base.resv)
+#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
{
- reservation_object_lock(obj->base.resv, NULL);
+ dma_resv_lock(obj->base.resv, NULL);
+}
+
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+{
+ return dma_resv_trylock(obj->base.resv);
}
static inline int
i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
{
- return reservation_object_lock_interruptible(obj->base.resv, NULL);
+ return dma_resv_lock_interruptible(obj->base.resv, NULL);
}
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
{
- reservation_object_unlock(obj->base.resv);
+ dma_resv_unlock(obj->base.resv);
}
struct dma_fence *
@@ -125,49 +141,74 @@ void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
static inline void
i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
{
- obj->base.vma_node.readonly = true;
+ obj->flags |= I915_BO_READONLY;
}
static inline bool
i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
{
- return obj->base.vma_node.readonly;
+ return obj->flags & I915_BO_READONLY;
+}
+
+static inline bool
+i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj)
+{
+ return obj->flags & I915_BO_ALLOC_CONTIGUOUS;
+}
+
+static inline bool
+i915_gem_object_is_volatile(const struct drm_i915_gem_object *obj)
+{
+ return obj->flags & I915_BO_ALLOC_VOLATILE;
+}
+
+static inline void
+i915_gem_object_set_volatile(struct drm_i915_gem_object *obj)
+{
+ obj->flags |= I915_BO_ALLOC_VOLATILE;
+}
+
+static inline bool
+i915_gem_object_type_has(const struct drm_i915_gem_object *obj,
+ unsigned long flags)
+{
+ return obj->ops->flags & flags;
}
static inline bool
i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
{
- return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
+ return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE);
}
static inline bool
i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
{
- return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE;
+ return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE);
}
static inline bool
i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
{
- return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
+ return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY);
}
static inline bool
-i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
+i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj)
{
- return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
+ return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_GGTT);
}
static inline bool
-i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
+i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
{
- return READ_ONCE(obj->active_count);
+ return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL);
}
static inline bool
i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
{
- return READ_ONCE(obj->framebuffer_references);
+ return READ_ONCE(obj->frontbuffer);
}
static inline unsigned int
@@ -239,10 +280,27 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
+ I915_MM_NORMAL = 0,
+ /*
+ * Only used by struct_mutex, when called "recursively" from
+ * direct-reclaim-esque. Safe because there is only every one
+ * struct_mutex in the entire system.
+ */
+ I915_MM_SHRINKER = 1,
+ /*
+ * Used for obj->mm.lock when allocating pages. Safe because the object
+ * isn't yet on any LRU, and therefore the shrinker can't deadlock on
+ * it. As soon as the object has pages, obj->mm.lock nests within
+ * fs_reclaim.
+ */
+ I915_MM_GET_PAGES = 1,
+};
+
static inline int __must_check
i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
{
- might_lock(&obj->mm.lock);
+ might_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
return 0;
@@ -285,13 +343,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
__i915_gem_object_unpin_pages(obj);
}
-enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
- I915_MM_NORMAL = 0,
- I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
-};
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
- enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
@@ -344,9 +396,6 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
i915_gem_object_unpin_pages(obj);
}
-void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
-void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
-
void
i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
unsigned int flush_domains);
@@ -373,7 +422,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
struct dma_fence *fence;
rcu_read_lock();
- fence = reservation_object_get_excl_rcu(obj->base.resv);
+ fence = dma_resv_get_excl_rcu(obj->base.resv);
rcu_read_unlock();
if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
@@ -400,6 +449,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
unsigned int flags);
void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
+void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
+void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
+
static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
if (obj->cache_dirty)
@@ -408,7 +461,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
return true;
- return obj->pin_global; /* currently in use by HW, keep flushed */
+ /* Currently in use by HW (display engine)? Keep flushed. */
+ return i915_gem_object_is_framebuffer(obj);
}
static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
@@ -425,6 +479,26 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
unsigned int flags,
const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
+
+void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin);
+void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin);
+
+static inline void
+i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin)
+{
+ if (unlikely(rcu_access_pointer(obj->frontbuffer)))
+ __i915_gem_object_flush_frontbuffer(obj, origin);
+}
+
+static inline void
+i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin)
+{
+ if (unlikely(rcu_access_pointer(obj->frontbuffer)))
+ __i915_gem_object_invalidate_frontbuffer(obj, origin);
+}
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index cb42e3a312e2..70809d8897cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -3,59 +3,137 @@
* Copyright © 2019 Intel Corporation
*/
-#include "i915_gem_object_blt.h"
-
+#include "i915_drv.h"
+#include "gt/intel_context.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_pool.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_ring.h"
#include "i915_gem_clflush.h"
-#include "intel_drv.h"
+#include "i915_gem_object_blt.h"
-int intel_emit_vma_fill_blt(struct i915_request *rq,
- struct i915_vma *vma,
- u32 value)
+struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
+ struct i915_vma *vma,
+ u32 value)
{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 8);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- if (INTEL_GEN(rq->i915) >= 8) {
- *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
- *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
- *cs++ = 0;
- *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cs++ = lower_32_bits(vma->node.start);
- *cs++ = upper_32_bits(vma->node.start);
- *cs++ = value;
- *cs++ = MI_NOOP;
- } else {
- *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
- *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
- *cs++ = 0;
- *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cs++ = vma->node.start;
- *cs++ = value;
- *cs++ = MI_NOOP;
- *cs++ = MI_NOOP;
+ struct drm_i915_private *i915 = ce->vm->i915;
+ const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
+ struct intel_engine_pool_node *pool;
+ struct i915_vma *batch;
+ u64 offset;
+ u64 count;
+ u64 rem;
+ u32 size;
+ u32 *cmd;
+ int err;
+
+ GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
+ intel_engine_pm_get(ce->engine);
+
+ count = div_u64(round_up(vma->size, block_size), block_size);
+ size = (1 + 8 * count) * sizeof(u32);
+ size = round_up(size, PAGE_SIZE);
+ pool = intel_engine_get_pool(ce->engine, size);
+ if (IS_ERR(pool)) {
+ err = PTR_ERR(pool);
+ goto out_pm;
+ }
+
+ cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto out_put;
+ }
+
+ rem = vma->size;
+ offset = vma->node.start;
+
+ do {
+ u32 size = min_t(u64, rem, block_size);
+
+ GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
+
+ if (INTEL_GEN(i915) >= 8) {
+ *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
+ *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+ *cmd++ = 0;
+ *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+ *cmd++ = lower_32_bits(offset);
+ *cmd++ = upper_32_bits(offset);
+ *cmd++ = value;
+ } else {
+ *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
+ *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+ *cmd++ = 0;
+ *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+ *cmd++ = offset;
+ *cmd++ = value;
+ }
+
+ /* Allow ourselves to be preempted in between blocks. */
+ *cmd++ = MI_ARB_CHECK;
+
+ offset += size;
+ rem -= size;
+ } while (rem);
+
+ *cmd = MI_BATCH_BUFFER_END;
+ intel_gt_chipset_flush(ce->vm->gt);
+
+ i915_gem_object_unpin_map(pool->obj);
+
+ batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_put;
}
- intel_ring_advance(rq, cs);
+ err = i915_vma_pin(batch, 0, 0, PIN_USER);
+ if (unlikely(err))
+ goto out_put;
+
+ batch->private = pool;
+ return batch;
- return 0;
+out_put:
+ intel_engine_pool_put(pool);
+out_pm:
+ intel_engine_pm_put(ce->engine);
+ return ERR_PTR(err);
+}
+
+int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
+{
+ int err;
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ i915_vma_unlock(vma);
+ if (unlikely(err))
+ return err;
+
+ return intel_engine_pool_mark_active(vma->private, rq);
+}
+
+void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
+{
+ i915_vma_unpin(vma);
+ intel_engine_pool_put(vma->private);
+ intel_engine_pm_put(ce->engine);
}
int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
struct intel_context *ce,
u32 value)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_gem_context *ctx = ce->gem_context;
- struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
struct i915_request *rq;
+ struct i915_vma *batch;
struct i915_vma *vma;
int err;
- /* XXX: ce->vm please */
- vma = i915_vma_instance(obj, vm, NULL);
+ vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -69,12 +147,22 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
i915_gem_object_unlock(obj);
}
- rq = i915_request_create(ce);
+ batch = intel_emit_vma_fill_blt(ce, vma, value);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_unpin;
+ }
+
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto out_unpin;
+ goto out_batch;
}
+ err = intel_emit_vma_mark_active(batch, rq);
+ if (unlikely(err))
+ goto out_request;
+
err = i915_request_await_object(rq, obj, true);
if (unlikely(err))
goto out_request;
@@ -86,22 +174,229 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
}
i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
if (unlikely(err))
goto out_request;
- err = intel_emit_vma_fill_blt(rq, vma, value);
+ err = ce->engine->emit_bb_start(rq,
+ batch->node.start, batch->node.size,
+ 0);
out_request:
if (unlikely(err))
i915_request_skip(rq, err);
i915_request_add(rq);
+out_batch:
+ intel_emit_vma_release(ce, batch);
out_unpin:
i915_vma_unpin(vma);
return err;
}
+struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+ struct i915_vma *src,
+ struct i915_vma *dst)
+{
+ struct drm_i915_private *i915 = ce->vm->i915;
+ const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
+ struct intel_engine_pool_node *pool;
+ struct i915_vma *batch;
+ u64 src_offset, dst_offset;
+ u64 count, rem;
+ u32 size, *cmd;
+ int err;
+
+ GEM_BUG_ON(src->size != dst->size);
+
+ GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
+ intel_engine_pm_get(ce->engine);
+
+ count = div_u64(round_up(dst->size, block_size), block_size);
+ size = (1 + 11 * count) * sizeof(u32);
+ size = round_up(size, PAGE_SIZE);
+ pool = intel_engine_get_pool(ce->engine, size);
+ if (IS_ERR(pool)) {
+ err = PTR_ERR(pool);
+ goto out_pm;
+ }
+
+ cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto out_put;
+ }
+
+ rem = src->size;
+ src_offset = src->node.start;
+ dst_offset = dst->node.start;
+
+ do {
+ size = min_t(u64, rem, block_size);
+ GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
+
+ if (INTEL_GEN(i915) >= 9) {
+ *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
+ *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
+ *cmd++ = 0;
+ *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+ *cmd++ = lower_32_bits(dst_offset);
+ *cmd++ = upper_32_bits(dst_offset);
+ *cmd++ = 0;
+ *cmd++ = PAGE_SIZE;
+ *cmd++ = lower_32_bits(src_offset);
+ *cmd++ = upper_32_bits(src_offset);
+ } else if (INTEL_GEN(i915) >= 8) {
+ *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
+ *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
+ *cmd++ = 0;
+ *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+ *cmd++ = lower_32_bits(dst_offset);
+ *cmd++ = upper_32_bits(dst_offset);
+ *cmd++ = 0;
+ *cmd++ = PAGE_SIZE;
+ *cmd++ = lower_32_bits(src_offset);
+ *cmd++ = upper_32_bits(src_offset);
+ } else {
+ *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
+ *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
+ *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
+ *cmd++ = dst_offset;
+ *cmd++ = PAGE_SIZE;
+ *cmd++ = src_offset;
+ }
+
+ /* Allow ourselves to be preempted in between blocks. */
+ *cmd++ = MI_ARB_CHECK;
+
+ src_offset += size;
+ dst_offset += size;
+ rem -= size;
+ } while (rem);
+
+ *cmd = MI_BATCH_BUFFER_END;
+ intel_gt_chipset_flush(ce->vm->gt);
+
+ i915_gem_object_unpin_map(pool->obj);
+
+ batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_put;
+ }
+
+ err = i915_vma_pin(batch, 0, 0, PIN_USER);
+ if (unlikely(err))
+ goto out_put;
+
+ batch->private = pool;
+ return batch;
+
+out_put:
+ intel_engine_pool_put(pool);
+out_pm:
+ intel_engine_pm_put(ce->engine);
+ return ERR_PTR(err);
+}
+
+static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ if (obj->cache_dirty & ~obj->cache_coherent)
+ i915_gem_clflush_object(obj, 0);
+
+ return i915_request_await_object(rq, obj, write);
+}
+
+int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
+ struct drm_i915_gem_object *dst,
+ struct intel_context *ce)
+{
+ struct drm_gem_object *objs[] = { &src->base, &dst->base };
+ struct i915_address_space *vm = ce->vm;
+ struct i915_vma *vma[2], *batch;
+ struct ww_acquire_ctx acquire;
+ struct i915_request *rq;
+ int err, i;
+
+ vma[0] = i915_vma_instance(src, vm, NULL);
+ if (IS_ERR(vma[0]))
+ return PTR_ERR(vma[0]);
+
+ err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
+ if (unlikely(err))
+ return err;
+
+ vma[1] = i915_vma_instance(dst, vm, NULL);
+ if (IS_ERR(vma[1]))
+ goto out_unpin_src;
+
+ err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
+ if (unlikely(err))
+ goto out_unpin_src;
+
+ batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_unpin_dst;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_batch;
+ }
+
+ err = intel_emit_vma_mark_active(batch, rq);
+ if (unlikely(err))
+ goto out_request;
+
+ err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
+ if (unlikely(err))
+ goto out_request;
+
+ for (i = 0; i < ARRAY_SIZE(vma); i++) {
+ err = move_to_gpu(vma[i], rq, i);
+ if (unlikely(err))
+ goto out_unlock;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vma); i++) {
+ unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
+
+ err = i915_vma_move_to_active(vma[i], rq, flags);
+ if (unlikely(err))
+ goto out_unlock;
+ }
+
+ if (rq->engine->emit_init_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (unlikely(err))
+ goto out_unlock;
+ }
+
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start, batch->node.size,
+ 0);
+out_unlock:
+ drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
+out_request:
+ if (unlikely(err))
+ i915_request_skip(rq, err);
+
+ i915_request_add(rq);
+out_batch:
+ intel_emit_vma_release(ce, batch);
+out_unpin_dst:
+ i915_vma_unpin(vma[1]);
+out_unpin_src:
+ i915_vma_unpin(vma[0]);
+ return err;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_gem_object_blt.c"
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
index 7ec7de6ac0c0..243a43a87824 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -8,17 +8,30 @@
#include <linux/types.h>
+#include "gt/intel_context.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_pool.h"
+#include "i915_vma.h"
+
struct drm_i915_gem_object;
-struct intel_context;
-struct i915_request;
-struct i915_vma;
-int intel_emit_vma_fill_blt(struct i915_request *rq,
- struct i915_vma *vma,
- u32 value);
+struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
+ struct i915_vma *vma,
+ u32 value);
+
+struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+ struct i915_vma *src,
+ struct i915_vma *dst);
+
+int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq);
+void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma);
int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
struct intel_context *ce,
u32 value);
+int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
+ struct drm_i915_gem_object *dst,
+ struct intel_context *ce);
+
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 18bf4f8d6d80..f64ad77e6b1e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -8,11 +8,13 @@
#define __I915_GEM_OBJECT_TYPES_H__
#include <drm/drm_gem.h>
+#include <uapi/drm/i915_drm.h>
#include "i915_active.h"
#include "i915_selftest.h"
struct drm_i915_gem_object;
+struct intel_fronbuffer;
/*
* struct i915_lut_handle tracks the fast lookups from handle to vma used
@@ -29,9 +31,11 @@ struct i915_lut_handle {
struct drm_i915_gem_object_ops {
unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
-#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY BIT(2)
-#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
+#define I915_GEM_OBJECT_HAS_IOMEM BIT(1)
+#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2)
+#define I915_GEM_OBJECT_IS_PROXY BIT(3)
+#define I915_GEM_OBJECT_NO_GGTT BIT(4)
+#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5)
/* Interface between the GEM object and its backing storage.
* get_pages() is called once prior to the use of the associated set
@@ -59,6 +63,21 @@ struct drm_i915_gem_object_ops {
void (*release)(struct drm_i915_gem_object *obj);
};
+enum i915_mmap_type {
+ I915_MMAP_TYPE_GTT = 0,
+ I915_MMAP_TYPE_WC,
+ I915_MMAP_TYPE_WB,
+ I915_MMAP_TYPE_UC,
+};
+
+struct i915_mmap_offset {
+ struct drm_vma_offset_node vma_node;
+ struct drm_i915_gem_object *obj;
+ enum i915_mmap_type mmap_type;
+
+ struct rb_node offset;
+};
+
struct drm_i915_gem_object {
struct drm_gem_object base;
@@ -114,9 +133,19 @@ struct drm_i915_gem_object {
unsigned int userfault_count;
struct list_head userfault_link;
- struct list_head batch_pool_link;
+ struct {
+ spinlock_t lock; /* Protects access to mmo offsets */
+ struct rb_root offsets;
+ } mmo;
+
I915_SELFTEST_DECLARE(struct list_head st_link);
+ unsigned long flags;
+#define I915_BO_ALLOC_CONTIGUOUS BIT(0)
+#define I915_BO_ALLOC_VOLATILE BIT(1)
+#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE)
+#define I915_BO_READONLY BIT(2)
+
/*
* Is the object to be mapped as read-only to the GPU
* Only honoured if hardware has relevant pte bit
@@ -142,9 +171,7 @@ struct drm_i915_gem_object {
*/
u16 write_domain;
- atomic_t frontbuffer_bits;
- unsigned int frontbuffer_ggtt_origin; /* write once */
- struct i915_active_request frontbuffer_write;
+ struct intel_frontbuffer __rcu *frontbuffer;
/** Current tiling stride for the object, if it's tiled. */
unsigned int tiling_and_stride;
@@ -154,18 +181,34 @@ struct drm_i915_gem_object {
/** Count of VMA actually bound by this object */
atomic_t bind_count;
- unsigned int active_count;
- /** Count of how many global VMA are currently pinned for use by HW */
- unsigned int pin_global;
struct {
- struct mutex lock; /* protects the pages and their use */
+ /*
+ * Protects the pages and their use. Do not use directly, but
+ * instead go through the pin/unpin interfaces.
+ */
+ struct mutex lock;
atomic_t pages_pin_count;
+ atomic_t shrink_pin;
+
+ /**
+ * Memory region for this object.
+ */
+ struct intel_memory_region *region;
+ /**
+ * List of memory region blocks allocated for this object.
+ */
+ struct list_head blocks;
+ /**
+ * Element within memory_region->objects or region->purgeable
+ * if the object is marked as DONTNEED. Access is protected by
+ * region->obj_lock.
+ */
+ struct list_head region_link;
struct sg_table *pages;
void *mapping;
- /* TODO: whack some of this into the error state */
struct i915_page_sizes {
/**
* The sg mask of the pages sg_table. i.e the mask of
@@ -226,9 +269,6 @@ struct drm_i915_gem_object {
bool quirked:1;
} mm;
- /** References from framebuffers, locks out tiling changes. */
- unsigned int framebuffer_references;
-
/** Record of address bit 17 of each page at last unbind. */
unsigned long *bit_17;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 65eb430cedba..54aca5c9101e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -7,6 +7,8 @@
#include "i915_drv.h"
#include "i915_gem_object.h"
#include "i915_scatterlist.h"
+#include "i915_gem_lmem.h"
+#include "i915_gem_mman.h"
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
@@ -18,6 +20,9 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
lockdep_assert_held(&obj->mm.lock);
+ if (i915_gem_object_is_volatile(obj))
+ obj->mm.madv = I915_MADV_DONTNEED;
+
/* Make the pages coherent with the GPU (flushing any swapin). */
if (obj->cache_dirty) {
obj->write_domain = 0;
@@ -71,6 +76,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
list = &i915->mm.shrink_list;
list_add_tail(&obj->mm.link, list);
+ atomic_set(&obj->mm.shrink_pin, 0);
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
}
}
@@ -101,7 +107,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
{
int err;
- err = mutex_lock_interruptible(&obj->mm.lock);
+ err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES);
if (err)
return err;
@@ -150,37 +156,30 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
rcu_read_unlock();
}
+static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
+{
+ if (is_vmalloc_addr(ptr))
+ vunmap(ptr);
+ else
+ kunmap(kmap_to_page(ptr));
+}
+
struct sg_table *
__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *pages;
pages = fetch_and_zero(&obj->mm.pages);
if (IS_ERR_OR_NULL(pages))
return pages;
- if (i915_gem_object_is_shrinkable(obj)) {
- unsigned long flags;
+ if (i915_gem_object_is_volatile(obj))
+ obj->mm.madv = I915_MADV_WILLNEED;
- spin_lock_irqsave(&i915->mm.obj_lock, flags);
-
- list_del(&obj->mm.link);
- i915->mm.shrink_count--;
- i915->mm.shrink_memory -= obj->base.size;
-
- spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
- }
+ i915_gem_object_make_unshrinkable(obj);
if (obj->mm.mapping) {
- void *ptr;
-
- ptr = page_mask_bits(obj->mm.mapping);
- if (is_vmalloc_addr(ptr))
- vunmap(ptr);
- else
- kunmap(kmap_to_page(ptr));
-
+ unmap_object(obj, page_mask_bits(obj->mm.mapping));
obj->mm.mapping = NULL;
}
@@ -190,8 +189,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
return pages;
}
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
- enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
{
struct sg_table *pages;
int err;
@@ -202,12 +200,14 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
GEM_BUG_ON(atomic_read(&obj->bind_count));
/* May be called by shrinker from within get_pages() (on another bo) */
- mutex_lock_nested(&obj->mm.lock, subclass);
+ mutex_lock(&obj->mm.lock);
if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
err = -EBUSY;
goto unlock;
}
+ i915_gem_object_release_mmap_offset(obj);
+
/*
* ->put_pages might need to allocate memory for the bit17 swizzle
* array, hence protect them from being reaped by removing them from gtt
@@ -234,36 +234,44 @@ unlock:
return err;
}
+static inline pte_t iomap_pte(resource_size_t base,
+ dma_addr_t offset,
+ pgprot_t prot)
+{
+ return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot));
+}
+
/* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
enum i915_map_type type)
{
- unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+ unsigned long n_pte = obj->base.size >> PAGE_SHIFT;
struct sg_table *sgt = obj->mm.pages;
- struct sgt_iter sgt_iter;
- struct page *page;
- struct page *stack_pages[32];
- struct page **pages = stack_pages;
- unsigned long i = 0;
+ pte_t *stack[32], **mem;
+ struct vm_struct *area;
pgprot_t pgprot;
- void *addr;
+
+ if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC)
+ return NULL;
/* A single page can always be kmapped */
- if (n_pages == 1 && type == I915_MAP_WB)
+ if (n_pte == 1 && type == I915_MAP_WB)
return kmap(sg_page(sgt->sgl));
- if (n_pages > ARRAY_SIZE(stack_pages)) {
+ mem = stack;
+ if (n_pte > ARRAY_SIZE(stack)) {
/* Too big for stack -- allocate temporary array instead */
- pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
- if (!pages)
+ mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
+ if (!mem)
return NULL;
}
- for_each_sgt_page(page, sgt_iter, sgt)
- pages[i++] = page;
-
- /* Check that we have the expected number of pages */
- GEM_BUG_ON(i != n_pages);
+ area = alloc_vm_area(obj->base.size, mem);
+ if (!area) {
+ if (mem != stack)
+ kvfree(mem);
+ return NULL;
+ }
switch (type) {
default:
@@ -276,12 +284,31 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
break;
}
- addr = vmap(pages, n_pages, 0, pgprot);
- if (pages != stack_pages)
- kvfree(pages);
+ if (i915_gem_object_has_struct_page(obj)) {
+ struct sgt_iter iter;
+ struct page *page;
+ pte_t **ptes = mem;
+
+ for_each_sgt_page(page, iter, sgt)
+ **ptes++ = mk_pte(page, pgprot);
+ } else {
+ resource_size_t iomap;
+ struct sgt_iter iter;
+ pte_t **ptes = mem;
+ dma_addr_t addr;
- return addr;
+ iomap = obj->mm.region->iomap.base;
+ iomap -= obj->mm.region->region.start;
+
+ for_each_sgt_daddr(addr, iter, sgt)
+ **ptes++ = iomap_pte(iomap, addr, pgprot);
+ }
+
+ if (mem != stack)
+ kvfree(mem);
+
+ return area->addr;
}
/* get, pin, and map the pages of the object into kernel space */
@@ -289,14 +316,16 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
enum i915_map_type type)
{
enum i915_map_type has_type;
+ unsigned int flags;
bool pinned;
void *ptr;
int err;
- if (unlikely(!i915_gem_object_has_struct_page(obj)))
+ flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM;
+ if (!i915_gem_object_type_has(obj, flags))
return ERR_PTR(-ENXIO);
- err = mutex_lock_interruptible(&obj->mm.lock);
+ err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES);
if (err)
return ERR_PTR(err);
@@ -325,10 +354,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
goto err_unpin;
}
- if (is_vmalloc_addr(ptr))
- vunmap(ptr);
- else
- kunmap(kmap_to_page(ptr));
+ unmap_object(obj, ptr);
ptr = obj->mm.mapping = NULL;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 2deac933cf59..b1b7c1b3038a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -13,8 +13,10 @@
#include <drm/drm_legacy.h> /* for drm_pci.h! */
#include <drm/drm_pci.h>
+#include "gt/intel_gt.h"
#include "i915_drv.h"
#include "i915_gem_object.h"
+#include "i915_gem_region.h"
#include "i915_scatterlist.h"
static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
@@ -60,7 +62,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
vaddr += PAGE_SIZE;
}
- i915_gem_chipset_flush(to_i915(obj->base.dev));
+ intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (!st) {
@@ -132,16 +134,16 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
drm_pci_free(obj->base.dev, obj->phys_handle);
}
-static void
-i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+static void phys_release(struct drm_i915_gem_object *obj)
{
- i915_gem_object_unpin_pages(obj);
+ fput(obj->base.filp);
}
static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
.get_pages = i915_gem_object_get_pages_phys,
.put_pages = i915_gem_object_put_pages_phys,
- .release = i915_gem_object_release_phys,
+
+ .release = phys_release,
};
int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
@@ -158,11 +160,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
if (obj->ops != &i915_gem_shmem_ops)
return -EINVAL;
- err = i915_gem_object_unbind(obj);
+ err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
if (err)
return err;
- mutex_lock(&obj->mm.lock);
+ mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
if (obj->mm.madv != I915_MADV_WILLNEED) {
err = -EFAULT;
@@ -190,8 +192,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
/* Perma-pin (until release) the physical set of pages */
__i915_gem_object_pin_pages(obj);
- if (!IS_ERR_OR_NULL(pages))
+ if (!IS_ERR_OR_NULL(pages)) {
i915_gem_shmem_ops.put_pages(obj, pages);
+ i915_gem_object_release_memory_region(obj);
+ }
mutex_unlock(&obj->mm.lock);
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 914b5d4112bb..c8264eb036bf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -5,149 +5,19 @@
*/
#include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
#include "i915_drv.h"
-#include "i915_globals.h"
-
-static void call_idle_barriers(struct intel_engine_cs *engine)
-{
- struct llist_node *node, *next;
-
- llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
- struct i915_active_request *active =
- container_of((struct list_head *)node,
- typeof(*active), link);
-
- INIT_LIST_HEAD(&active->link);
- RCU_INIT_POINTER(active->request, NULL);
-
- active->retire(active, NULL);
- }
-}
-
-static void i915_gem_park(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- lockdep_assert_held(&i915->drm.struct_mutex);
-
- for_each_engine(engine, i915, id) {
- call_idle_barriers(engine); /* cleanup after wedging */
- i915_gem_batch_pool_fini(&engine->batch_pool);
- }
-
- i915_timelines_park(i915);
- i915_vma_parked(i915);
-
- i915_globals_park();
-}
-
-static void idle_work_handler(struct work_struct *work)
-{
- struct drm_i915_private *i915 =
- container_of(work, typeof(*i915), gem.idle_work);
- bool park;
-
- cancel_delayed_work_sync(&i915->gem.retire_work);
- mutex_lock(&i915->drm.struct_mutex);
-
- intel_wakeref_lock(&i915->gt.wakeref);
- park = !intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work);
- intel_wakeref_unlock(&i915->gt.wakeref);
- if (park)
- i915_gem_park(i915);
- else
- queue_delayed_work(i915->wq,
- &i915->gem.retire_work,
- round_jiffies_up_relative(HZ));
-
- mutex_unlock(&i915->drm.struct_mutex);
-}
-
-static void retire_work_handler(struct work_struct *work)
-{
- struct drm_i915_private *i915 =
- container_of(work, typeof(*i915), gem.retire_work.work);
-
- /* Come back later if the device is busy... */
- if (mutex_trylock(&i915->drm.struct_mutex)) {
- i915_retire_requests(i915);
- mutex_unlock(&i915->drm.struct_mutex);
- }
-
- queue_delayed_work(i915->wq,
- &i915->gem.retire_work,
- round_jiffies_up_relative(HZ));
-}
-
-static int pm_notifier(struct notifier_block *nb,
- unsigned long action,
- void *data)
-{
- struct drm_i915_private *i915 =
- container_of(nb, typeof(*i915), gem.pm_notifier);
-
- switch (action) {
- case INTEL_GT_UNPARK:
- i915_globals_unpark();
- queue_delayed_work(i915->wq,
- &i915->gem.retire_work,
- round_jiffies_up_relative(HZ));
- break;
-
- case INTEL_GT_PARK:
- queue_work(i915->wq, &i915->gem.idle_work);
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
-{
- bool result = !i915_terminally_wedged(i915);
-
- do {
- if (i915_gem_wait_for_idle(i915,
- I915_WAIT_LOCKED |
- I915_WAIT_FOR_IDLE_BOOST,
- I915_GEM_IDLE_TIMEOUT) == -ETIME) {
- /* XXX hide warning from gem_eio */
- if (i915_modparams.reset) {
- dev_err(i915->drm.dev,
- "Failed to idle engines, declaring wedged!\n");
- GEM_TRACE_DUMP();
- }
-
- /*
- * Forcibly cancel outstanding work and leave
- * the gpu quiet.
- */
- i915_gem_set_wedged(i915);
- result = false;
- }
- } while (i915_retire_requests(i915) && result);
-
- GEM_BUG_ON(i915->gt.awake);
- return result;
-}
-
-bool i915_gem_load_power_context(struct drm_i915_private *i915)
-{
- return switch_to_kernel_context_sync(i915);
-}
void i915_gem_suspend(struct drm_i915_private *i915)
{
- GEM_TRACE("\n");
+ GEM_TRACE("%s\n", dev_name(i915->drm.dev));
intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
flush_workqueue(i915->wq);
- mutex_lock(&i915->drm.struct_mutex);
-
/*
* We have to flush all the executing contexts to main memory so
* that they can saved in the hibernation image. To ensure the last
@@ -157,22 +27,9 @@ void i915_gem_suspend(struct drm_i915_private *i915)
* state. Fortunately, the kernel_context is disposable and we do
* not rely on its state.
*/
- switch_to_kernel_context_sync(i915);
-
- mutex_unlock(&i915->drm.struct_mutex);
-
- /*
- * Assert that we successfully flushed all the work and
- * reset the GPU back to its idle, low power state.
- */
- GEM_BUG_ON(i915->gt.awake);
- flush_work(&i915->gem.idle_work);
-
- cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+ intel_gt_suspend_prepare(&i915->gt);
i915_gem_drain_freed_objects(i915);
-
- intel_uc_suspend(i915);
}
static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
@@ -212,6 +69,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
* machine in an unusable condition.
*/
+ intel_gt_suspend_late(&i915->gt);
+
spin_lock_irqsave(&i915->mm.obj_lock, flags);
for (phase = phases; *phase; phase++) {
LIST_HEAD(keep);
@@ -236,60 +95,16 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
list_splice_tail(&keep, *phase);
}
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
-
- intel_uc_sanitize(i915);
- i915_gem_sanitize(i915);
}
void i915_gem_resume(struct drm_i915_private *i915)
{
- GEM_TRACE("\n");
-
- WARN_ON(i915->gt.awake);
-
- mutex_lock(&i915->drm.struct_mutex);
- intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
-
- i915_gem_restore_gtt_mappings(i915);
- i915_gem_restore_fences(i915);
-
- if (i915_gem_init_hw(i915))
- goto err_wedged;
+ GEM_TRACE("%s\n", dev_name(i915->drm.dev));
/*
* As we didn't flush the kernel context before suspend, we cannot
* guarantee that the context image is complete. So let's just reset
* it and start again.
*/
- if (intel_gt_resume(i915))
- goto err_wedged;
-
- intel_uc_resume(i915);
-
- /* Always reload a context for powersaving. */
- if (!i915_gem_load_power_context(i915))
- goto err_wedged;
-
-out_unlock:
- intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
- mutex_unlock(&i915->drm.struct_mutex);
- return;
-
-err_wedged:
- if (!i915_reset_failed(i915)) {
- dev_err(i915->drm.dev,
- "Failed to re-initialize GPU, declaring it wedged!\n");
- i915_gem_set_wedged(i915);
- }
- goto out_unlock;
-}
-
-void i915_gem_init__pm(struct drm_i915_private *i915)
-{
- INIT_WORK(&i915->gem.idle_work, idle_work_handler);
- INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
-
- i915->gem.pm_notifier.notifier_call = pm_notifier;
- blocking_notifier_chain_register(&i915->gt.pm_notifications,
- &i915->gem.pm_notifier);
+ intel_gt_resume(&i915->gt);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
index 6f7d5d11ac3b..26b78dbdc225 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
@@ -12,9 +12,6 @@
struct drm_i915_private;
struct work_struct;
-void i915_gem_init__pm(struct drm_i915_private *i915);
-
-bool i915_gem_load_power_context(struct drm_i915_private *i915);
void i915_gem_resume(struct drm_i915_private *i915);
void i915_gem_idle_work_handler(struct work_struct *work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
new file mode 100644
index 000000000000..1515384d7e0e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_memory_region.h"
+#include "i915_gem_region.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+
+void
+i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ __intel_memory_region_put_pages_buddy(obj->mm.region, &obj->mm.blocks);
+
+ obj->mm.dirty = false;
+ sg_free_table(pages);
+ kfree(pages);
+}
+
+int
+i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj)
+{
+ struct intel_memory_region *mem = obj->mm.region;
+ struct list_head *blocks = &obj->mm.blocks;
+ resource_size_t size = obj->base.size;
+ resource_size_t prev_end;
+ struct i915_buddy_block *block;
+ unsigned int flags;
+ struct sg_table *st;
+ struct scatterlist *sg;
+ unsigned int sg_page_sizes;
+ int ret;
+
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ return -ENOMEM;
+
+ if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) {
+ kfree(st);
+ return -ENOMEM;
+ }
+
+ flags = I915_ALLOC_MIN_PAGE_SIZE;
+ if (obj->flags & I915_BO_ALLOC_CONTIGUOUS)
+ flags |= I915_ALLOC_CONTIGUOUS;
+
+ ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks);
+ if (ret)
+ goto err_free_sg;
+
+ GEM_BUG_ON(list_empty(blocks));
+
+ sg = st->sgl;
+ st->nents = 0;
+ sg_page_sizes = 0;
+ prev_end = (resource_size_t)-1;
+
+ list_for_each_entry(block, blocks, link) {
+ u64 block_size, offset;
+
+ block_size = min_t(u64, size,
+ i915_buddy_block_size(&mem->mm, block));
+ offset = i915_buddy_block_offset(block);
+
+ GEM_BUG_ON(overflows_type(block_size, sg->length));
+
+ if (offset != prev_end ||
+ add_overflows_t(typeof(sg->length), sg->length, block_size)) {
+ if (st->nents) {
+ sg_page_sizes |= sg->length;
+ sg = __sg_next(sg);
+ }
+
+ sg_dma_address(sg) = mem->region.start + offset;
+ sg_dma_len(sg) = block_size;
+
+ sg->length = block_size;
+
+ st->nents++;
+ } else {
+ sg->length += block_size;
+ sg_dma_len(sg) += block_size;
+ }
+
+ prev_end = offset + block_size;
+ }
+
+ sg_page_sizes |= sg->length;
+ sg_mark_end(sg);
+ i915_sg_trim(st);
+
+ __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+ return 0;
+
+err_free_sg:
+ sg_free_table(st);
+ kfree(st);
+ return ret;
+}
+
+void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
+ struct intel_memory_region *mem,
+ unsigned long flags)
+{
+ INIT_LIST_HEAD(&obj->mm.blocks);
+ obj->mm.region = intel_memory_region_get(mem);
+
+ obj->flags |= flags;
+ if (obj->base.size <= mem->min_page_size)
+ obj->flags |= I915_BO_ALLOC_CONTIGUOUS;
+
+ mutex_lock(&mem->objects.lock);
+
+ if (obj->flags & I915_BO_ALLOC_VOLATILE)
+ list_add(&obj->mm.region_link, &mem->objects.purgeable);
+ else
+ list_add(&obj->mm.region_link, &mem->objects.list);
+
+ mutex_unlock(&mem->objects.lock);
+}
+
+void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
+{
+ struct intel_memory_region *mem = obj->mm.region;
+
+ mutex_lock(&mem->objects.lock);
+ list_del(&obj->mm.region_link);
+ mutex_unlock(&mem->objects.lock);
+
+ intel_memory_region_put(mem);
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags)
+{
+ struct drm_i915_gem_object *obj;
+
+ /*
+ * NB: Our use of resource_size_t for the size stems from using struct
+ * resource for the mem->region. We might need to revisit this in the
+ * future.
+ */
+
+ GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
+
+ if (!mem)
+ return ERR_PTR(-ENODEV);
+
+ size = round_up(size, mem->min_page_size);
+
+ GEM_BUG_ON(!size);
+ GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT));
+
+ /*
+ * XXX: There is a prevalence of the assumption that we fit the
+ * object's page count inside a 32bit _signed_ variable. Let's document
+ * this and catch if we ever need to fix it. In the meantime, if you do
+ * spot such a local variable, please consider fixing!
+ */
+
+ if (size >> PAGE_SHIFT > INT_MAX)
+ return ERR_PTR(-E2BIG);
+
+ if (overflows_type(size, obj->base.size))
+ return ERR_PTR(-E2BIG);
+
+ obj = mem->ops->create_object(mem, size, flags);
+ if (!IS_ERR(obj))
+ trace_i915_gem_object_create(obj);
+
+ return obj;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h
new file mode 100644
index 000000000000..f2ff6f8bff74
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_REGION_H__
+#define __I915_GEM_REGION_H__
+
+#include <linux/types.h>
+
+struct intel_memory_region;
+struct drm_i915_gem_object;
+struct sg_table;
+
+int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj);
+void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+
+void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
+ struct intel_memory_region *mem,
+ unsigned long flags);
+void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 19d9ecdb2894..a2a980d9d241 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -7,9 +7,12 @@
#include <linux/pagevec.h>
#include <linux/swap.h>
+#include "gem/i915_gem_region.h"
#include "i915_drv.h"
+#include "i915_gemfs.h"
#include "i915_gem_object.h"
#include "i915_scatterlist.h"
+#include "i915_trace.h"
/*
* Move pages to appropriate lru and release the pagevec, decrementing the
@@ -25,6 +28,7 @@ static void check_release_pagevec(struct pagevec *pvec)
static int shmem_get_pages(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct intel_memory_region *mem = obj->mm.region;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
unsigned long i;
struct address_space *mapping;
@@ -51,7 +55,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
* If there's no chance of allocating enough pages for the whole
* object, bail early.
*/
- if (page_count > totalram_pages())
+ if (obj->base.size > resource_size(&mem->region))
return -ENOMEM;
st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -414,6 +418,13 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
return 0;
}
+static void shmem_release(struct drm_i915_gem_object *obj)
+{
+ i915_gem_object_release_memory_region(obj);
+
+ fput(obj->base.filp);
+}
+
const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
I915_GEM_OBJECT_IS_SHRINKABLE,
@@ -424,11 +435,13 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
.writeback = shmem_writeback,
.pwrite = shmem_pwrite,
+
+ .release = shmem_release,
};
-static int create_shmem(struct drm_i915_private *i915,
- struct drm_gem_object *obj,
- size_t size)
+static int __create_shmem(struct drm_i915_private *i915,
+ struct drm_gem_object *obj,
+ resource_size_t size)
{
unsigned long flags = VM_NORESERVE;
struct file *filp;
@@ -447,31 +460,24 @@ static int create_shmem(struct drm_i915_private *i915,
return 0;
}
-struct drm_i915_gem_object *
-i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
+static struct drm_i915_gem_object *
+create_shmem(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags)
{
+ static struct lock_class_key lock_class;
+ struct drm_i915_private *i915 = mem->i915;
struct drm_i915_gem_object *obj;
struct address_space *mapping;
unsigned int cache_level;
gfp_t mask;
int ret;
- /* There is a prevalence of the assumption that we fit the object's
- * page count inside a 32bit _signed_ variable. Let's document this and
- * catch if we ever need to fix it. In the meantime, if you do spot
- * such a local variable, please consider fixing!
- */
- if (size >> PAGE_SHIFT > INT_MAX)
- return ERR_PTR(-E2BIG);
-
- if (overflows_type(size, obj->base.size))
- return ERR_PTR(-E2BIG);
-
obj = i915_gem_object_alloc();
if (!obj)
return ERR_PTR(-ENOMEM);
- ret = create_shmem(i915, &obj->base, size);
+ ret = __create_shmem(i915, &obj->base, size);
if (ret)
goto fail;
@@ -486,7 +492,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
mapping_set_gfp_mask(mapping, mask);
GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
- i915_gem_object_init(obj, &i915_gem_shmem_ops);
+ i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class);
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
@@ -510,7 +516,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
i915_gem_object_set_cache_coherency(obj, cache_level);
- trace_i915_gem_object_create(obj);
+ i915_gem_object_init_memory_region(obj, mem, 0);
return obj;
@@ -519,14 +525,22 @@ fail:
return ERR_PTR(ret);
}
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915,
+ resource_size_t size)
+{
+ return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
+ size, 0);
+}
+
/* Allocate a new GEM object and fill it with the supplied data */
struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
- const void *data, size_t size)
+ const void *data, resource_size_t size)
{
struct drm_i915_gem_object *obj;
struct file *file;
- size_t offset;
+ resource_size_t offset;
int err;
obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
@@ -569,3 +583,37 @@ fail:
i915_gem_object_put(obj);
return ERR_PTR(err);
}
+
+static int init_shmem(struct intel_memory_region *mem)
+{
+ int err;
+
+ err = i915_gemfs_init(mem->i915);
+ if (err) {
+ DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n",
+ err);
+ }
+
+ intel_memory_region_set_name(mem, "system");
+
+ return 0; /* Don't error, we can simply fallback to the kernel mnt */
+}
+
+static void release_shmem(struct intel_memory_region *mem)
+{
+ i915_gemfs_fini(mem->i915);
+}
+
+static const struct intel_memory_region_ops shmem_region_ops = {
+ .init = init_shmem,
+ .release = release_shmem,
+ .create_object = create_shmem,
+};
+
+struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915)
+{
+ return intel_memory_region_create(i915, 0,
+ totalram_pages() << PAGE_SHIFT,
+ PAGE_SIZE, 0,
+ &shmem_region_ops);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 3a926a8755c6..f7e4b39c734f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -16,40 +16,6 @@
#include "i915_trace.h"
-static bool shrinker_lock(struct drm_i915_private *i915,
- unsigned int flags,
- bool *unlock)
-{
- struct mutex *m = &i915->drm.struct_mutex;
-
- switch (mutex_trylock_recursive(m)) {
- case MUTEX_TRYLOCK_RECURSIVE:
- *unlock = false;
- return true;
-
- case MUTEX_TRYLOCK_FAILED:
- *unlock = false;
- if (flags & I915_SHRINK_ACTIVE &&
- mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0)
- *unlock = true;
- return *unlock;
-
- case MUTEX_TRYLOCK_SUCCESS:
- *unlock = true;
- return true;
- }
-
- BUG();
-}
-
-static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
-{
- if (!unlock)
- return;
-
- mutex_unlock(&i915->drm.struct_mutex);
-}
-
static bool swap_available(void)
{
return get_nr_swap_pages() > 0;
@@ -61,7 +27,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
if (!i915_gem_object_is_shrinkable(obj))
return false;
- /* Only report true if by unbinding the object and putting its pages
+ /*
+ * Only report true if by unbinding the object and putting its pages
* we can actually make forward progress towards freeing physical
* pages.
*
@@ -72,26 +39,26 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
return false;
- /* If any vma are "permanently" pinned, it will prevent us from
- * reclaiming the obj->mm.pages. We only allow scanout objects to claim
- * a permanent pin, along with a few others like the context objects.
- * To simplify the scan, and to avoid walking the list of vma under the
- * object, we just check the count of its permanently pinned.
- */
- if (READ_ONCE(obj->pin_global))
- return false;
-
- /* We can only return physical pages to the system if we can either
+ /*
+ * We can only return physical pages to the system if we can either
* discard the contents (because the user has marked them as being
* purgeable) or if we can move their contents out to swap.
*/
return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
}
-static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
+static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
+ unsigned long shrink)
{
- if (i915_gem_object_unbind(obj) == 0)
- __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+ unsigned long flags;
+
+ flags = 0;
+ if (shrink & I915_SHRINK_ACTIVE)
+ flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
+
+ if (i915_gem_object_unbind(obj, flags) == 0)
+ __i915_gem_object_put_pages(obj);
+
return !i915_gem_object_has_pages(obj);
}
@@ -154,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
intel_wakeref_t wakeref = 0;
unsigned long count = 0;
unsigned long scanned = 0;
- bool unlock;
-
- if (!shrinker_lock(i915, shrink, &unlock))
- return 0;
/*
* When shrinking the active list, we should also consider active
@@ -169,7 +132,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
*/
trace_i915_gem_shrink(i915, target, shrink);
- i915_retire_requests(i915);
/*
* Unbinding of objects will require HW access; Let us not wake the
@@ -230,8 +192,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
continue;
if (!(shrink & I915_SHRINK_ACTIVE) &&
- (i915_gem_object_is_active(obj) ||
- i915_gem_object_is_framebuffer(obj)))
+ i915_gem_object_is_framebuffer(obj))
continue;
if (!(shrink & I915_SHRINK_BOUND) &&
@@ -246,10 +207,9 @@ i915_gem_shrink(struct drm_i915_private *i915,
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
- if (unsafe_drop_pages(obj)) {
+ if (unsafe_drop_pages(obj, shrink)) {
/* May arrive from get_pages on another bo */
- mutex_lock_nested(&obj->mm.lock,
- I915_MM_SHRINKER);
+ mutex_lock(&obj->mm.lock);
if (!i915_gem_object_has_pages(obj)) {
try_to_writeback(obj, shrink);
count += obj->base.size >> PAGE_SHIFT;
@@ -269,10 +229,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
if (shrink & I915_SHRINK_BOUND)
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- i915_retire_requests(i915);
-
- shrinker_unlock(i915, unlock);
-
if (nr_scanned)
*nr_scanned += scanned;
return count;
@@ -342,19 +298,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
struct drm_i915_private *i915 =
container_of(shrinker, struct drm_i915_private, mm.shrinker);
unsigned long freed;
- bool unlock;
sc->nr_scanned = 0;
- if (!shrinker_lock(i915, 0, &unlock))
- return SHRINK_STOP;
-
freed = i915_gem_shrink(i915,
sc->nr_to_scan,
&sc->nr_scanned,
I915_SHRINK_BOUND |
- I915_SHRINK_UNBOUND |
- I915_SHRINK_WRITEBACK);
+ I915_SHRINK_UNBOUND);
if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
intel_wakeref_t wakeref;
@@ -369,8 +320,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
}
}
- shrinker_unlock(i915, unlock);
-
return sc->nr_scanned ? freed : SHRINK_STOP;
}
@@ -387,6 +336,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
freed_pages = 0;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
freed_pages += i915_gem_shrink(i915, -1UL, NULL,
+ I915_SHRINK_ACTIVE |
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND |
I915_SHRINK_WRITEBACK);
@@ -422,16 +372,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
struct i915_vma *vma, *next;
unsigned long freed_pages = 0;
intel_wakeref_t wakeref;
- bool unlock;
-
- if (!shrinker_lock(i915, 0, &unlock))
- return NOTIFY_DONE;
-
- /* Force everything onto the inactive lists */
- if (i915_gem_wait_for_idle(i915,
- I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT))
- goto out;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
freed_pages += i915_gem_shrink(i915, -1UL, NULL,
@@ -448,27 +388,16 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
if (!vma->iomap || i915_vma_is_active(vma))
continue;
- mutex_unlock(&i915->ggtt.vm.mutex);
- if (i915_vma_unbind(vma) == 0)
+ if (__i915_vma_unbind(vma) == 0)
freed_pages += count;
- mutex_lock(&i915->ggtt.vm.mutex);
}
mutex_unlock(&i915->ggtt.vm.mutex);
-out:
- shrinker_unlock(i915, unlock);
-
*(unsigned long *)ptr += freed_pages;
return NOTIFY_DONE;
}
-/**
- * i915_gem_shrinker_register - Register the i915 shrinker
- * @i915: i915 device
- *
- * This function registers and sets up the i915 shrinker and OOM handler.
- */
-void i915_gem_shrinker_register(struct drm_i915_private *i915)
+void i915_gem_driver_register__shrinker(struct drm_i915_private *i915)
{
i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
@@ -483,13 +412,7 @@ void i915_gem_shrinker_register(struct drm_i915_private *i915)
WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier));
}
-/**
- * i915_gem_shrinker_unregister - Unregisters the i915 shrinker
- * @i915: i915 device
- *
- * This function unregisters the i915 shrinker and OOM handler.
- */
-void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
+void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915)
{
WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier));
@@ -512,24 +435,75 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
fs_reclaim_acquire(GFP_KERNEL);
+ mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
+ mutex_release(&mutex->dep_map, _RET_IP_);
+
+ fs_reclaim_release(GFP_KERNEL);
+
+ if (unlock)
+ mutex_release(&i915->drm.struct_mutex.dep_map, _RET_IP_);
+}
+
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
+void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = obj_to_i915(obj);
+ unsigned long flags;
+
/*
- * As we invariably rely on the struct_mutex within the shrinker,
- * but have a complicated recursion dance, taint all the mutexes used
- * within the shrinker with the struct_mutex. For completeness, we
- * taint with all subclass of struct_mutex, even though we should
- * only need tainting by I915_MM_NORMAL to catch possible ABBA
- * deadlocks from using struct_mutex inside @mutex.
+ * We can only be called while the pages are pinned or when
+ * the pages are released. If pinned, we should only be called
+ * from a single caller under controlled conditions; and on release
+ * only one caller may release us. Neither the two may cross.
*/
- mutex_acquire(&i915->drm.struct_mutex.dep_map,
- I915_MM_SHRINKER, 0, _RET_IP_);
+ if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0))
+ return;
- mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
- mutex_release(&mutex->dep_map, 0, _RET_IP_);
+ spin_lock_irqsave(&i915->mm.obj_lock, flags);
+ if (!atomic_fetch_inc(&obj->mm.shrink_pin) &&
+ !list_empty(&obj->mm.link)) {
+ list_del_init(&obj->mm.link);
+ i915->mm.shrink_count--;
+ i915->mm.shrink_memory -= obj->base.size;
+ }
+ spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+}
- mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+ struct list_head *head)
+{
+ struct drm_i915_private *i915 = obj_to_i915(obj);
+ unsigned long flags;
- fs_reclaim_release(GFP_KERNEL);
+ GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+ if (!i915_gem_object_is_shrinkable(obj))
+ return;
- if (unlock)
- mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+ if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1))
+ return;
+
+ spin_lock_irqsave(&i915->mm.obj_lock, flags);
+ GEM_BUG_ON(!kref_read(&obj->base.refcount));
+ if (atomic_dec_and_test(&obj->mm.shrink_pin)) {
+ GEM_BUG_ON(!list_empty(&obj->mm.link));
+
+ list_add_tail(&obj->mm.link, head);
+ i915->mm.shrink_count++;
+ i915->mm.shrink_memory += obj->base.size;
+
+ }
+ spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+}
+
+void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+ __i915_gem_object_make_shrinkable(obj,
+ &obj_to_i915(obj)->mm.shrink_list);
+}
+
+void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+ __i915_gem_object_make_shrinkable(obj,
+ &obj_to_i915(obj)->mm.purge_list);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h
new file mode 100644
index 000000000000..b397d7785789
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_SHRINKER_H__
+#define __I915_GEM_SHRINKER_H__
+
+#include <linux/bits.h>
+
+struct drm_i915_private;
+struct mutex;
+
+/* i915_gem_shrinker.c */
+unsigned long i915_gem_shrink(struct drm_i915_private *i915,
+ unsigned long target,
+ unsigned long *nr_scanned,
+ unsigned flags);
+#define I915_SHRINK_UNBOUND BIT(0)
+#define I915_SHRINK_BOUND BIT(1)
+#define I915_SHRINK_ACTIVE BIT(2)
+#define I915_SHRINK_VMAPS BIT(3)
+#define I915_SHRINK_WRITEBACK BIT(4)
+
+unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
+void i915_gem_driver_register__shrinker(struct drm_i915_private *i915);
+void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915);
+void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
+ struct mutex *mutex);
+
+#endif /* __I915_GEM_SHRINKER_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index de1fab2058ec..451f3078d60d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -10,7 +10,9 @@
#include <drm/drm_mm.h>
#include <drm/i915_drm.h>
+#include "gem/i915_gem_region.h"
#include "i915_drv.h"
+#include "i915_gem_stolen.h"
/*
* The BIOS typically reserves some of the system's memory for the exclusive
@@ -24,48 +26,49 @@
* for is a boon.
*/
-int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
+int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915,
struct drm_mm_node *node, u64 size,
unsigned alignment, u64 start, u64 end)
{
int ret;
- if (!drm_mm_initialized(&dev_priv->mm.stolen))
+ if (!drm_mm_initialized(&i915->mm.stolen))
return -ENODEV;
/* WaSkipStolenMemoryFirstPage:bdw+ */
- if (INTEL_GEN(dev_priv) >= 8 && start < 4096)
+ if (INTEL_GEN(i915) >= 8 && start < 4096)
start = 4096;
- mutex_lock(&dev_priv->mm.stolen_lock);
- ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node,
+ mutex_lock(&i915->mm.stolen_lock);
+ ret = drm_mm_insert_node_in_range(&i915->mm.stolen, node,
size, alignment, 0,
start, end, DRM_MM_INSERT_BEST);
- mutex_unlock(&dev_priv->mm.stolen_lock);
+ mutex_unlock(&i915->mm.stolen_lock);
return ret;
}
-int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
+int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
struct drm_mm_node *node, u64 size,
unsigned alignment)
{
- return i915_gem_stolen_insert_node_in_range(dev_priv, node, size,
+ return i915_gem_stolen_insert_node_in_range(i915, node, size,
alignment, 0, U64_MAX);
}
-void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
+void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
struct drm_mm_node *node)
{
- mutex_lock(&dev_priv->mm.stolen_lock);
+ mutex_lock(&i915->mm.stolen_lock);
drm_mm_remove_node(node);
- mutex_unlock(&dev_priv->mm.stolen_lock);
+ mutex_unlock(&i915->mm.stolen_lock);
}
-static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
+static int i915_adjust_stolen(struct drm_i915_private *i915,
struct resource *dsm)
{
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct i915_ggtt *ggtt = &i915->ggtt;
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct resource *r;
if (dsm->start == 0 || dsm->end <= dsm->start)
@@ -77,14 +80,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
*/
/* Make sure we don't clobber the GTT if it's within stolen memory */
- if (INTEL_GEN(dev_priv) <= 4 &&
- !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) {
+ if (INTEL_GEN(i915) <= 4 &&
+ !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) {
struct resource stolen[2] = {*dsm, *dsm};
struct resource ggtt_res;
resource_size_t ggtt_start;
- ggtt_start = I915_READ(PGTBL_CTL);
- if (IS_GEN(dev_priv, 4))
+ ggtt_start = intel_uncore_read(uncore, PGTBL_CTL);
+ if (IS_GEN(i915, 4))
ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) |
(ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28;
else
@@ -118,7 +121,7 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
* kernel. So if the region is already marked as busy, something
* is seriously wrong.
*/
- r = devm_request_mem_region(dev_priv->drm.dev, dsm->start,
+ r = devm_request_mem_region(i915->drm.dev, dsm->start,
resource_size(dsm),
"Graphics Stolen Memory");
if (r == NULL) {
@@ -131,14 +134,14 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
* reservation starting from 1 instead of 0.
* There's also BIOS with off-by-one on the other end.
*/
- r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1,
+ r = devm_request_mem_region(i915->drm.dev, dsm->start + 1,
resource_size(dsm) - 2,
"Graphics Stolen Memory");
/*
* GEN3 firmware likes to smash pci bridges into the stolen
* range. Apparently this works.
*/
- if (r == NULL && !IS_GEN(dev_priv, 3)) {
+ if (!r && !IS_GEN(i915, 3)) {
DRM_ERROR("conflict detected with stolen region: %pR\n",
dsm);
@@ -149,25 +152,27 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
return 0;
}
-void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv)
+static void i915_gem_cleanup_stolen(struct drm_i915_private *i915)
{
- if (!drm_mm_initialized(&dev_priv->mm.stolen))
+ if (!drm_mm_initialized(&i915->mm.stolen))
return;
- drm_mm_takedown(&dev_priv->mm.stolen);
+ drm_mm_takedown(&i915->mm.stolen);
}
-static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void g4x_get_stolen_reserved(struct drm_i915_private *i915,
+ struct intel_uncore *uncore,
resource_size_t *base,
resource_size_t *size)
{
- u32 reg_val = I915_READ(IS_GM45(dev_priv) ?
- CTG_STOLEN_RESERVED :
- ELK_STOLEN_RESERVED);
- resource_size_t stolen_top = dev_priv->dsm.end + 1;
+ u32 reg_val = intel_uncore_read(uncore,
+ IS_GM45(i915) ?
+ CTG_STOLEN_RESERVED :
+ ELK_STOLEN_RESERVED);
+ resource_size_t stolen_top = i915->dsm.end + 1;
DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n",
- IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val);
+ IS_GM45(i915) ? "CTG" : "ELK", reg_val);
if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0)
return;
@@ -176,7 +181,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
* Whether ILK really reuses the ELK register for this is unclear.
* Let's see if we catch anyone with this supposedly enabled on ILK.
*/
- WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n",
+ WARN(IS_GEN(i915, 5), "ILK stolen reserved found? 0x%08x\n",
reg_val);
if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK))
@@ -188,11 +193,12 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
*size = stolen_top - *base;
}
-static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void gen6_get_stolen_reserved(struct drm_i915_private *i915,
+ struct intel_uncore *uncore,
resource_size_t *base,
resource_size_t *size)
{
- u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+ u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
@@ -220,12 +226,13 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
}
-static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void vlv_get_stolen_reserved(struct drm_i915_private *i915,
+ struct intel_uncore *uncore,
resource_size_t *base,
resource_size_t *size)
{
- u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
- resource_size_t stolen_top = dev_priv->dsm.end + 1;
+ u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
+ resource_size_t stolen_top = i915->dsm.end + 1;
DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
@@ -248,11 +255,12 @@ static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv,
*base = stolen_top - *size;
}
-static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void gen7_get_stolen_reserved(struct drm_i915_private *i915,
+ struct intel_uncore *uncore,
resource_size_t *base,
resource_size_t *size)
{
- u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+ u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
@@ -274,11 +282,12 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
}
-static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void chv_get_stolen_reserved(struct drm_i915_private *i915,
+ struct intel_uncore *uncore,
resource_size_t *base,
resource_size_t *size)
{
- u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+ u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
@@ -306,12 +315,13 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
}
-static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void bdw_get_stolen_reserved(struct drm_i915_private *i915,
+ struct intel_uncore *uncore,
resource_size_t *base,
resource_size_t *size)
{
- u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
- resource_size_t stolen_top = dev_priv->dsm.end + 1;
+ u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
+ resource_size_t stolen_top = i915->dsm.end + 1;
DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
@@ -326,10 +336,11 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
static void icl_get_stolen_reserved(struct drm_i915_private *i915,
+ struct intel_uncore *uncore,
resource_size_t *base,
resource_size_t *size)
{
- u64 reg_val = intel_uncore_read64(&i915->uncore, GEN6_STOLEN_RESERVED);
+ u64 reg_val = intel_uncore_read64(uncore, GEN6_STOLEN_RESERVED);
DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
@@ -354,75 +365,84 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
}
}
-int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
+static int i915_gem_init_stolen(struct drm_i915_private *i915)
{
+ struct intel_uncore *uncore = &i915->uncore;
resource_size_t reserved_base, stolen_top;
resource_size_t reserved_total, reserved_size;
- mutex_init(&dev_priv->mm.stolen_lock);
+ mutex_init(&i915->mm.stolen_lock);
- if (intel_vgpu_active(dev_priv)) {
- DRM_INFO("iGVT-g active, disabling use of stolen memory\n");
+ if (intel_vgpu_active(i915)) {
+ dev_notice(i915->drm.dev,
+ "%s, disabling use of stolen memory\n",
+ "iGVT-g active");
return 0;
}
- if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) {
- DRM_INFO("DMAR active, disabling use of stolen memory\n");
+ if (intel_vtd_active() && INTEL_GEN(i915) < 8) {
+ dev_notice(i915->drm.dev,
+ "%s, disabling use of stolen memory\n",
+ "DMAR active");
return 0;
}
if (resource_size(&intel_graphics_stolen_res) == 0)
return 0;
- dev_priv->dsm = intel_graphics_stolen_res;
+ i915->dsm = intel_graphics_stolen_res;
- if (i915_adjust_stolen(dev_priv, &dev_priv->dsm))
+ if (i915_adjust_stolen(i915, &i915->dsm))
return 0;
- GEM_BUG_ON(dev_priv->dsm.start == 0);
- GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start);
+ GEM_BUG_ON(i915->dsm.start == 0);
+ GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
- stolen_top = dev_priv->dsm.end + 1;
+ stolen_top = i915->dsm.end + 1;
reserved_base = stolen_top;
reserved_size = 0;
- switch (INTEL_GEN(dev_priv)) {
+ switch (INTEL_GEN(i915)) {
case 2:
case 3:
break;
case 4:
- if (!IS_G4X(dev_priv))
+ if (!IS_G4X(i915))
break;
/* fall through */
case 5:
- g4x_get_stolen_reserved(dev_priv,
+ g4x_get_stolen_reserved(i915, uncore,
&reserved_base, &reserved_size);
break;
case 6:
- gen6_get_stolen_reserved(dev_priv,
+ gen6_get_stolen_reserved(i915, uncore,
&reserved_base, &reserved_size);
break;
case 7:
- if (IS_VALLEYVIEW(dev_priv))
- vlv_get_stolen_reserved(dev_priv,
+ if (IS_VALLEYVIEW(i915))
+ vlv_get_stolen_reserved(i915, uncore,
&reserved_base, &reserved_size);
else
- gen7_get_stolen_reserved(dev_priv,
+ gen7_get_stolen_reserved(i915, uncore,
&reserved_base, &reserved_size);
break;
case 8:
case 9:
case 10:
- if (IS_LP(dev_priv))
- chv_get_stolen_reserved(dev_priv,
+ if (IS_LP(i915))
+ chv_get_stolen_reserved(i915, uncore,
&reserved_base, &reserved_size);
else
- bdw_get_stolen_reserved(dev_priv,
+ bdw_get_stolen_reserved(i915, uncore,
&reserved_base, &reserved_size);
break;
- case 11:
default:
- icl_get_stolen_reserved(dev_priv, &reserved_base,
+ MISSING_CASE(INTEL_GEN(i915));
+ /* fall-through */
+ case 11:
+ case 12:
+ icl_get_stolen_reserved(i915, uncore,
+ &reserved_base,
&reserved_size);
break;
}
@@ -439,12 +459,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
reserved_size = 0;
}
- dev_priv->dsm_reserved =
- (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size);
+ i915->dsm_reserved =
+ (struct resource)DEFINE_RES_MEM(reserved_base, reserved_size);
- if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) {
+ if (!resource_contains(&i915->dsm, &i915->dsm_reserved)) {
DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n",
- &dev_priv->dsm_reserved, &dev_priv->dsm);
+ &i915->dsm_reserved, &i915->dsm);
return 0;
}
@@ -453,14 +473,14 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
reserved_total = stolen_top - reserved_base;
DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n",
- (u64)resource_size(&dev_priv->dsm) >> 10,
- ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10);
+ (u64)resource_size(&i915->dsm) >> 10,
+ ((u64)resource_size(&i915->dsm) - reserved_total) >> 10);
- dev_priv->stolen_usable_size =
- resource_size(&dev_priv->dsm) - reserved_total;
+ i915->stolen_usable_size =
+ resource_size(&i915->dsm) - reserved_total;
/* Basic memrange allocator for stolen space. */
- drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size);
+ drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size);
return 0;
}
@@ -469,11 +489,11 @@ static struct sg_table *
i915_pages_create_for_stolen(struct drm_device *dev,
resource_size_t offset, resource_size_t size)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
+ struct drm_i915_private *i915 = to_i915(dev);
struct sg_table *st;
struct scatterlist *sg;
- GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm)));
+ GEM_BUG_ON(range_overflows(offset, size, resource_size(&i915->dsm)));
/* We hide that we have no struct page backing our stolen object
* by wrapping the contiguous physical allocation with a fake
@@ -493,7 +513,7 @@ i915_pages_create_for_stolen(struct drm_device *dev,
sg->offset = 0;
sg->length = size;
- sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset;
+ sg_dma_address(sg) = (dma_addr_t)i915->dsm.start + offset;
sg_dma_len(sg) = size;
return st;
@@ -524,14 +544,14 @@ static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
static void
i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen);
GEM_BUG_ON(!stolen);
- __i915_gem_object_unpin_pages(obj);
+ i915_gem_object_release_memory_region(obj);
- i915_gem_stolen_remove_node(dev_priv, stolen);
+ i915_gem_stolen_remove_node(i915, stolen);
kfree(stolen);
}
@@ -542,83 +562,133 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
};
static struct drm_i915_gem_object *
-_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
- struct drm_mm_node *stolen)
+__i915_gem_object_create_stolen(struct intel_memory_region *mem,
+ struct drm_mm_node *stolen)
{
+ static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
unsigned int cache_level;
+ int err = -ENOMEM;
obj = i915_gem_object_alloc();
- if (obj == NULL)
- return NULL;
+ if (!obj)
+ goto err;
- drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size);
- i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
+ drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size);
+ i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class);
obj->stolen = stolen;
obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
- cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
i915_gem_object_set_cache_coherency(obj, cache_level);
- if (i915_gem_object_pin_pages(obj))
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
goto cleanup;
+ i915_gem_object_init_memory_region(obj, mem, 0);
+
return obj;
cleanup:
i915_gem_object_free(obj);
- return NULL;
+err:
+ return ERR_PTR(err);
}
-struct drm_i915_gem_object *
-i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
- resource_size_t size)
+static struct drm_i915_gem_object *
+_i915_gem_object_create_stolen(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags)
{
+ struct drm_i915_private *i915 = mem->i915;
struct drm_i915_gem_object *obj;
struct drm_mm_node *stolen;
int ret;
- if (!drm_mm_initialized(&dev_priv->mm.stolen))
- return NULL;
+ if (!drm_mm_initialized(&i915->mm.stolen))
+ return ERR_PTR(-ENODEV);
if (size == 0)
- return NULL;
+ return ERR_PTR(-EINVAL);
stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
if (!stolen)
- return NULL;
+ return ERR_PTR(-ENOMEM);
- ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096);
+ ret = i915_gem_stolen_insert_node(i915, stolen, size, 4096);
if (ret) {
- kfree(stolen);
- return NULL;
+ obj = ERR_PTR(ret);
+ goto err_free;
}
- obj = _i915_gem_object_create_stolen(dev_priv, stolen);
- if (obj)
- return obj;
+ obj = __i915_gem_object_create_stolen(mem, stolen);
+ if (IS_ERR(obj))
+ goto err_remove;
+
+ return obj;
- i915_gem_stolen_remove_node(dev_priv, stolen);
+err_remove:
+ i915_gem_stolen_remove_node(i915, stolen);
+err_free:
kfree(stolen);
- return NULL;
+ return obj;
}
struct drm_i915_gem_object *
-i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
+i915_gem_object_create_stolen(struct drm_i915_private *i915,
+ resource_size_t size)
+{
+ return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN],
+ size, I915_BO_ALLOC_CONTIGUOUS);
+}
+
+static int init_stolen(struct intel_memory_region *mem)
+{
+ intel_memory_region_set_name(mem, "stolen");
+
+ /*
+ * Initialise stolen early so that we may reserve preallocated
+ * objects for the BIOS to KMS transition.
+ */
+ return i915_gem_init_stolen(mem->i915);
+}
+
+static void release_stolen(struct intel_memory_region *mem)
+{
+ i915_gem_cleanup_stolen(mem->i915);
+}
+
+static const struct intel_memory_region_ops i915_region_stolen_ops = {
+ .init = init_stolen,
+ .release = release_stolen,
+ .create_object = _i915_gem_object_create_stolen,
+};
+
+struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915)
+{
+ return intel_memory_region_create(i915,
+ intel_graphics_stolen_res.start,
+ resource_size(&intel_graphics_stolen_res),
+ PAGE_SIZE, 0,
+ &i915_region_stolen_ops);
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915,
resource_size_t stolen_offset,
resource_size_t gtt_offset,
resource_size_t size)
{
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN];
+ struct i915_ggtt *ggtt = &i915->ggtt;
struct drm_i915_gem_object *obj;
struct drm_mm_node *stolen;
struct i915_vma *vma;
int ret;
- if (!drm_mm_initialized(&dev_priv->mm.stolen))
- return NULL;
-
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ if (!drm_mm_initialized(&i915->mm.stolen))
+ return ERR_PTR(-ENODEV);
DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
&stolen_offset, &gtt_offset, &size);
@@ -627,29 +697,29 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
if (WARN_ON(size == 0) ||
WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) ||
WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT)))
- return NULL;
+ return ERR_PTR(-EINVAL);
stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
if (!stolen)
- return NULL;
+ return ERR_PTR(-ENOMEM);
stolen->start = stolen_offset;
stolen->size = size;
- mutex_lock(&dev_priv->mm.stolen_lock);
- ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen);
- mutex_unlock(&dev_priv->mm.stolen_lock);
+ mutex_lock(&i915->mm.stolen_lock);
+ ret = drm_mm_reserve_node(&i915->mm.stolen, stolen);
+ mutex_unlock(&i915->mm.stolen_lock);
if (ret) {
DRM_DEBUG_DRIVER("failed to allocate stolen space\n");
kfree(stolen);
- return NULL;
+ return ERR_PTR(ret);
}
- obj = _i915_gem_object_create_stolen(dev_priv, stolen);
- if (obj == NULL) {
+ obj = __i915_gem_object_create_stolen(mem, stolen);
+ if (IS_ERR(obj)) {
DRM_DEBUG_DRIVER("failed to allocate stolen object\n");
- i915_gem_stolen_remove_node(dev_priv, stolen);
+ i915_gem_stolen_remove_node(i915, stolen);
kfree(stolen);
- return NULL;
+ return obj;
}
/* Some objects just need physical mem from stolen space */
@@ -671,22 +741,26 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
* setting up the GTT space. The actual reservation will occur
* later.
*/
+ mutex_lock(&ggtt->vm.mutex);
ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
size, gtt_offset, obj->cache_level,
0);
if (ret) {
DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n");
+ mutex_unlock(&ggtt->vm.mutex);
goto err_pages;
}
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+ GEM_BUG_ON(vma->pages);
vma->pages = obj->mm.pages;
- vma->flags |= I915_VMA_GLOBAL_BIND;
+ atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
+
+ set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
__i915_vma_set_map_and_fenceable(vma);
- mutex_lock(&ggtt->vm.mutex);
- list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
+ list_add_tail(&vma->vm_link, &ggtt->vm.bound_list);
mutex_unlock(&ggtt->vm.mutex);
GEM_BUG_ON(i915_gem_object_is_shrinkable(obj));
@@ -698,5 +772,5 @@ err_pages:
i915_gem_object_unpin_pages(obj);
err:
i915_gem_object_put(obj);
- return NULL;
+ return ERR_PTR(ret);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
new file mode 100644
index 000000000000..c1040627fbf3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_STOLEN_H__
+#define __I915_GEM_STOLEN_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_mm_node;
+struct drm_i915_gem_object;
+
+int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
+ struct drm_mm_node *node, u64 size,
+ unsigned alignment);
+int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
+ struct drm_mm_node *node, u64 size,
+ unsigned alignment, u64 start,
+ u64 end);
+void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
+ struct drm_mm_node *node);
+struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915);
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+ resource_size_t size);
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
+ resource_size_t stolen_offset,
+ resource_size_t gtt_offset,
+ resource_size_t size);
+
+#endif /* __I915_GEM_STOLEN_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
index adb3074d9ce2..540ef0551789 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -41,7 +41,7 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
long ret;
/* ABI: return -EIO if already wedged */
- ret = i915_terminally_wedged(to_i915(dev));
+ ret = intel_gt_terminally_wedged(&to_i915(dev)->gt);
if (ret)
return ret;
@@ -50,10 +50,8 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
if (time_after_eq(request->emitted_jiffies, recent_enough))
break;
- if (target) {
+ if (target && xchg(&target->file_priv, NULL))
list_del(&target->client_link);
- target->file_priv = NULL;
- }
target = request;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index ca0c2f451742..6c7825a2dc2a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -11,6 +11,7 @@
#include "i915_drv.h"
#include "i915_gem.h"
#include "i915_gem_ioctls.h"
+#include "i915_gem_mman.h"
#include "i915_gem_object.h"
/**
@@ -181,22 +182,25 @@ static int
i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
int tiling_mode, unsigned int stride)
{
+ struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
struct i915_vma *vma;
- int ret;
+ int ret = 0;
if (tiling_mode == I915_TILING_NONE)
return 0;
+ mutex_lock(&ggtt->vm.mutex);
for_each_ggtt_vma(vma, obj) {
if (i915_vma_fence_prepare(vma, tiling_mode, stride))
continue;
- ret = i915_vma_unbind(vma);
+ ret = __i915_vma_unbind(vma);
if (ret)
- return ret;
+ break;
}
+ mutex_unlock(&ggtt->vm.mutex);
- return 0;
+ return ret;
}
int
@@ -212,7 +216,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride));
GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE));
- lockdep_assert_held(&i915->drm.struct_mutex);
if ((tiling | stride) == obj->tiling_and_stride)
return 0;
@@ -233,16 +236,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
* whilst executing a fenced command for an untiled object.
*/
- err = i915_gem_object_fence_prepare(obj, tiling, stride);
- if (err)
- return err;
-
i915_gem_object_lock(obj);
if (i915_gem_object_is_framebuffer(obj)) {
i915_gem_object_unlock(obj);
return -EBUSY;
}
+ err = i915_gem_object_fence_prepare(obj, tiling, stride);
+ if (err) {
+ i915_gem_object_unlock(obj);
+ return err;
+ }
+
/* If the memory has unknown (i.e. varying) swizzling, we pin the
* pages to prevent them being swapped out and causing corruption
* due to the change in swizzling.
@@ -313,10 +318,14 @@ int
i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_set_tiling *args = data;
struct drm_i915_gem_object *obj;
int err;
+ if (!dev_priv->ggtt.num_fences)
+ return -EOPNOTSUPP;
+
obj = i915_gem_object_lookup(file, args->handle);
if (!obj)
return -ENOENT;
@@ -340,9 +349,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
args->stride = 0;
} else {
if (args->tiling_mode == I915_TILING_X)
- args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x;
+ args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x;
else
- args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y;
+ args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y;
/* Hide bit 17 swizzling from the user. This prevents old Mesa
* from aborting the application on sw fallbacks to bit 17,
@@ -364,12 +373,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
}
}
- err = mutex_lock_interruptible(&dev->struct_mutex);
- if (err)
- goto err;
-
err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
- mutex_unlock(&dev->struct_mutex);
/* We have to maintain this existing ABI... */
args->stride = i915_gem_object_get_stride(obj);
@@ -402,6 +406,9 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_object *obj;
int err = -ENOENT;
+ if (!dev_priv->ggtt.num_fences)
+ return -EOPNOTSUPP;
+
rcu_read_lock();
obj = i915_gem_object_lookup_rcu(file, args->handle);
if (obj) {
@@ -415,10 +422,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
switch (args->tiling_mode) {
case I915_TILING_X:
- args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
+ args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x;
break;
case I915_TILING_Y:
- args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
+ args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y;
break;
default:
case I915_TILING_NONE:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 2caa594322bc..580319b7bf1a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -12,11 +12,10 @@
#include <drm/i915_drm.h>
+#include "i915_drv.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
#include "i915_scatterlist.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
struct i915_mm_struct {
struct mm_struct *mm;
@@ -93,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
struct i915_mmu_notifier *mn =
container_of(_mn, struct i915_mmu_notifier, mn);
struct interval_tree_node *it;
- struct mutex *unlock = NULL;
unsigned long end;
int ret = 0;
@@ -130,32 +128,14 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
}
spin_unlock(&mn->lock);
- if (!unlock) {
- unlock = &mn->mm->i915->drm.struct_mutex;
-
- switch (mutex_trylock_recursive(unlock)) {
- default:
- case MUTEX_TRYLOCK_FAILED:
- if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
- i915_gem_object_put(obj);
- return -EINTR;
- }
- /* fall through */
- case MUTEX_TRYLOCK_SUCCESS:
- break;
-
- case MUTEX_TRYLOCK_RECURSIVE:
- unlock = ERR_PTR(-EEXIST);
- break;
- }
- }
-
- ret = i915_gem_object_unbind(obj);
+ ret = i915_gem_object_unbind(obj,
+ I915_GEM_OBJECT_UNBIND_ACTIVE |
+ I915_GEM_OBJECT_UNBIND_BARRIER);
if (ret == 0)
- ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+ ret = __i915_gem_object_put_pages(obj);
i915_gem_object_put(obj);
if (ret)
- goto unlock;
+ return ret;
spin_lock(&mn->lock);
@@ -168,10 +148,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
}
spin_unlock(&mn->lock);
-unlock:
- if (!IS_ERR_OR_NULL(unlock))
- mutex_unlock(unlock);
-
return ret;
}
@@ -427,7 +403,7 @@ struct get_pages_work {
static struct sg_table *
__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
- struct page **pvec, int num_pages)
+ struct page **pvec, unsigned long num_pages)
{
unsigned int max_segment = i915_sg_segment_size();
struct sg_table *st;
@@ -473,9 +449,10 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
{
struct get_pages_work *work = container_of(_work, typeof(*work), work);
struct drm_i915_gem_object *obj = work->obj;
- const int npages = obj->base.size >> PAGE_SHIFT;
+ const unsigned long npages = obj->base.size >> PAGE_SHIFT;
+ unsigned long pinned;
struct page **pvec;
- int pinned, ret;
+ int ret;
ret = -ENOMEM;
pinned = 0;
@@ -484,31 +461,36 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
if (pvec != NULL) {
struct mm_struct *mm = obj->userptr.mm->mm;
unsigned int flags = 0;
+ int locked = 0;
if (!i915_gem_object_is_readonly(obj))
flags |= FOLL_WRITE;
ret = -EFAULT;
if (mmget_not_zero(mm)) {
- down_read(&mm->mmap_sem);
while (pinned < npages) {
+ if (!locked) {
+ down_read(&mm->mmap_sem);
+ locked = 1;
+ }
ret = get_user_pages_remote
(work->task, mm,
obj->userptr.ptr + pinned * PAGE_SIZE,
npages - pinned,
flags,
- pvec + pinned, NULL, NULL);
+ pvec + pinned, NULL, &locked);
if (ret < 0)
break;
pinned += ret;
}
- up_read(&mm->mmap_sem);
+ if (locked)
+ up_read(&mm->mmap_sem);
mmput(mm);
}
}
- mutex_lock(&obj->mm.lock);
+ mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
if (obj->userptr.work == &work->work) {
struct sg_table *pages = ERR_PTR(ret);
@@ -578,7 +560,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
{
- const int num_pages = obj->base.size >> PAGE_SHIFT;
+ const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
struct mm_struct *mm = obj->userptr.mm->mm;
struct page **pvec;
struct sg_table *pages;
@@ -662,8 +644,16 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
__i915_gem_object_release_shmem(obj, pages, true);
i915_gem_gtt_finish_pages(obj, pages);
+ /*
+ * We always mark objects as dirty when they are used by the GPU,
+ * just in case. However, if we set the vma as being read-only we know
+ * that the object will never have been written to.
+ */
+ if (i915_gem_object_is_readonly(obj))
+ obj->mm.dirty = false;
+
for_each_sgt_page(page, sgt_iter, pages) {
- if (obj->mm.dirty)
+ if (obj->mm.dirty && trylock_page(page)) {
/*
* As this may not be anonymous memory (e.g. shmem)
* but exist on a real mapping, we have to lock
@@ -671,8 +661,20 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
* the page reference is not sufficient to
* prevent the inode from being truncated.
* Play safe and take the lock.
+ *
+ * However...!
+ *
+ * The mmu-notifier can be invalidated for a
+ * migrate_page, that is alreadying holding the lock
+ * on the page. Such a try_to_unmap() will result
+ * in us calling put_pages() and so recursively try
+ * to lock the page. We avoid that deadlock with
+ * a trylock_page() and in exchange we risk missing
+ * some page dirtying.
*/
- set_page_dirty_lock(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ }
mark_page_accessed(page);
put_page(page);
@@ -702,6 +704,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
I915_GEM_OBJECT_IS_SHRINKABLE |
+ I915_GEM_OBJECT_NO_GGTT |
I915_GEM_OBJECT_ASYNC_CANCEL,
.get_pages = i915_gem_userptr_get_pages,
.put_pages = i915_gem_userptr_put_pages,
@@ -749,6 +752,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
void *data,
struct drm_file *file)
{
+ static struct lock_class_key lock_class;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_userptr *args = data;
struct drm_i915_gem_object *obj;
@@ -776,14 +780,11 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
return -EFAULT;
if (args->flags & I915_USERPTR_READ_ONLY) {
- struct i915_address_space *vm;
-
/*
* On almost all of the older hw, we cannot tell the GPU that
* a page is readonly.
*/
- vm = dev_priv->kernel_context->vm;
- if (!vm || !vm->has_read_only)
+ if (!dev_priv->gt.vm->has_read_only)
return -ENODEV;
}
@@ -792,7 +793,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
return -ENOMEM;
drm_gem_private_object_init(dev, &obj->base, args->user_size);
- i915_gem_object_init(obj, &i915_gem_userptr_ops);
+ i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;
i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 26ec6579b7cd..8af55cd3e690 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -31,11 +31,10 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
}
static long
-i915_gem_object_wait_reservation(struct reservation_object *resv,
+i915_gem_object_wait_reservation(struct dma_resv *resv,
unsigned int flags,
long timeout)
{
- unsigned int seq = __read_seqcount_begin(&resv->seq);
struct dma_fence *excl;
bool prune_fences = false;
@@ -44,7 +43,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
unsigned int count, i;
int ret;
- ret = reservation_object_get_fences_rcu(resv,
+ ret = dma_resv_get_fences_rcu(resv,
&excl, &count, &shared);
if (ret)
return ret;
@@ -73,7 +72,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
*/
prune_fences = count && timeout >= 0;
} else {
- excl = reservation_object_get_excl_rcu(resv);
+ excl = dma_resv_get_excl_rcu(resv);
}
if (excl && timeout >= 0)
@@ -83,15 +82,12 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
/*
* Opportunistically prune the fences iff we know they have *all* been
- * signaled and that the reservation object has not been changed (i.e.
- * no new fences have been added).
+ * signaled.
*/
- if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
- if (reservation_object_trylock(resv)) {
- if (!__read_seqcount_retry(&resv->seq, seq))
- reservation_object_add_excl_fence(resv, NULL);
- reservation_object_unlock(resv);
- }
+ if (prune_fences && dma_resv_trylock(resv)) {
+ if (dma_resv_test_signaled_rcu(resv, true))
+ dma_resv_add_excl_fence(resv, NULL);
+ dma_resv_unlock(resv);
}
return timeout;
@@ -144,7 +140,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
unsigned int count, i;
int ret;
- ret = reservation_object_get_fences_rcu(obj->base.resv,
+ ret = dma_resv_get_fences_rcu(obj->base.resv,
&excl, &count, &shared);
if (ret)
return ret;
@@ -156,7 +152,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
kfree(shared);
} else {
- excl = reservation_object_get_excl_rcu(obj->base.resv);
+ excl = dma_resv_get_excl_rcu(obj->base.resv);
}
if (excl) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
index 099f3397aada..5e6e8c91ab38 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -20,31 +20,18 @@ int i915_gemfs_init(struct drm_i915_private *i915)
if (!type)
return -ENODEV;
- gemfs = kern_mount(type);
- if (IS_ERR(gemfs))
- return PTR_ERR(gemfs);
-
/*
- * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most
- * likely 2M. Note that within_size may overallocate huge-pages, if say
- * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under
- * memory pressure shmem should split any huge-pages which can be
- * shrunk.
+ * By creating our own shmemfs mountpoint, we can pass in
+ * mount flags that better match our usecase.
+ *
+ * One example, although it is probably better with a per-file
+ * control, is selecting huge page allocations ("huge=within_size").
+ * Currently unused due to bandwidth issues (slow reads) on Broadwell+.
*/
- if (has_transparent_hugepage()) {
- struct super_block *sb = gemfs->mnt_sb;
- /* FIXME: Disabled until we get W/A for read BW issue. */
- char options[] = "huge=never";
- int flags = 0;
- int err;
-
- err = sb->s_op->remount_fs(sb, &flags, options);
- if (err) {
- kern_unmount(gemfs);
- return err;
- }
- }
+ gemfs = kern_mount(type);
+ if (IS_ERR(gemfs))
+ return PTR_ERR(gemfs);
i915->mm.gemfs = gemfs;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 3c5d17b2b670..fa16f2c3f3ac 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -12,10 +12,14 @@ static void huge_free_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages)
{
unsigned long nreal = obj->scratch / PAGE_SIZE;
- struct scatterlist *sg;
+ struct sgt_iter sgt_iter;
+ struct page *page;
- for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg))
- __free_page(sg_page(sg));
+ for_each_sgt_page(page, sgt_iter, pages) {
+ __free_page(page);
+ if (!--nreal)
+ break;
+ }
sg_free_table(pages);
kfree(pages);
@@ -70,7 +74,6 @@ static int huge_get_pages(struct drm_i915_gem_object *obj)
err:
huge_free_pages(obj, pages);
-
return -ENOMEM;
#undef GFP
}
@@ -96,6 +99,7 @@ huge_gem_object(struct drm_i915_private *i915,
phys_addr_t phys_size,
dma_addr_t dma_size)
{
+ static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
unsigned int cache_level;
@@ -111,7 +115,7 @@ huge_gem_object(struct drm_i915_private *i915,
return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, dma_size);
- i915_gem_object_init(obj, &huge_ops);
+ i915_gem_object_init(obj, &huge_ops, &lock_class);
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
index 549c1394bcdc..b8cf31b7bf14 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
@@ -7,6 +7,12 @@
#ifndef __HUGE_GEM_OBJECT_H
#define __HUGE_GEM_OBJECT_H
+#include <linux/types.h>
+
+#include "gem/i915_gem_object_types.h"
+
+struct drm_i915_private;
+
struct drm_i915_gem_object *
huge_gem_object(struct drm_i915_private *i915,
phys_addr_t phys_size,
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index b74729b6f353..9311250d7d6f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -8,13 +8,18 @@
#include "i915_selftest.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
+
#include "igt_gem_utils.h"
#include "mock_context.h"
#include "selftests/mock_drm.h"
#include "selftests/mock_gem_device.h"
+#include "selftests/mock_region.h"
#include "selftests/i915_random.h"
static const unsigned int page_sizes[] = {
@@ -111,8 +116,6 @@ static int get_huge_pages(struct drm_i915_gem_object *obj)
if (i915_gem_gtt_prepare_pages(obj, st))
goto err;
- obj->mm.madv = I915_MADV_DONTNEED;
-
GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask);
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
@@ -133,7 +136,6 @@ static void put_huge_pages(struct drm_i915_gem_object *obj,
huge_pages_free_pages(pages);
obj->mm.dirty = false;
- obj->mm.madv = I915_MADV_WILLNEED;
}
static const struct drm_i915_gem_object_ops huge_page_ops = {
@@ -148,6 +150,7 @@ huge_pages_object(struct drm_i915_private *i915,
u64 size,
unsigned int page_mask)
{
+ static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
GEM_BUG_ON(!size);
@@ -164,7 +167,9 @@ huge_pages_object(struct drm_i915_private *i915,
return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, size);
- i915_gem_object_init(obj, &huge_page_ops);
+ i915_gem_object_init(obj, &huge_page_ops, &lock_class);
+
+ i915_gem_object_set_volatile(obj);
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
@@ -225,8 +230,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj)
i915_sg_trim(st);
- obj->mm.madv = I915_MADV_DONTNEED;
-
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
return 0;
@@ -259,8 +262,6 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj)
sg_dma_len(sg) = obj->base.size;
sg_dma_address(sg) = page_size;
- obj->mm.madv = I915_MADV_DONTNEED;
-
__i915_gem_object_set_pages(obj, st, sg->length);
return 0;
@@ -279,7 +280,6 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj,
{
fake_free_huge_pages(obj, pages);
obj->mm.dirty = false;
- obj->mm.madv = I915_MADV_WILLNEED;
}
static const struct drm_i915_gem_object_ops fake_ops = {
@@ -297,6 +297,7 @@ static const struct drm_i915_gem_object_ops fake_ops_single = {
static struct drm_i915_gem_object *
fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
{
+ static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
GEM_BUG_ON(!size);
@@ -315,9 +316,11 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
drm_gem_private_object_init(&i915->drm, &obj->base, size);
if (single)
- i915_gem_object_init(obj, &fake_ops_single);
+ i915_gem_object_init(obj, &fake_ops_single, &lock_class);
else
- i915_gem_object_init(obj, &fake_ops);
+ i915_gem_object_init(obj, &fake_ops, &lock_class);
+
+ i915_gem_object_set_volatile(obj);
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
@@ -331,7 +334,12 @@ static int igt_check_page_sizes(struct i915_vma *vma)
struct drm_i915_private *i915 = vma->vm->i915;
unsigned int supported = INTEL_INFO(i915)->page_sizes;
struct drm_i915_gem_object *obj = vma->obj;
- int err = 0;
+ int err;
+
+ /* We have to wait for the async bind to complete before our asserts */
+ err = i915_vma_sync(vma);
+ if (err)
+ return err;
if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
@@ -445,6 +453,88 @@ out_device:
return err;
}
+static int igt_mock_memory_region_huge_pages(void *arg)
+{
+ const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS };
+ struct i915_ppgtt *ppgtt = arg;
+ struct drm_i915_private *i915 = ppgtt->vm.i915;
+ unsigned long supported = INTEL_INFO(i915)->page_sizes;
+ struct intel_memory_region *mem;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int bit;
+ int err = 0;
+
+ mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
+ if (IS_ERR(mem)) {
+ pr_err("%s failed to create memory region\n", __func__);
+ return PTR_ERR(mem);
+ }
+
+ for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+ unsigned int page_size = BIT(bit);
+ resource_size_t phys;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(flags); ++i) {
+ obj = i915_gem_object_create_region(mem, page_size,
+ flags[i]);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_region;
+ }
+
+ vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_put;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto out_close;
+
+ err = igt_check_page_sizes(vma);
+ if (err)
+ goto out_unpin;
+
+ phys = i915_gem_object_get_dma_address(obj, 0);
+ if (!IS_ALIGNED(phys, page_size)) {
+ pr_err("%s addr misaligned(%pa) page_size=%u\n",
+ __func__, &phys, page_size);
+ err = -EINVAL;
+ goto out_unpin;
+ }
+
+ if (vma->page_sizes.gtt != page_size) {
+ pr_err("%s page_sizes.gtt=%u, expected=%u\n",
+ __func__, vma->page_sizes.gtt,
+ page_size);
+ err = -EINVAL;
+ goto out_unpin;
+ }
+
+ i915_vma_unpin(vma);
+ i915_vma_close(vma);
+
+ __i915_gem_object_put_pages(obj);
+ i915_gem_object_put(obj);
+ }
+ }
+
+ goto out_region;
+
+out_unpin:
+ i915_vma_unpin(vma);
+out_close:
+ i915_vma_close(vma);
+out_put:
+ i915_gem_object_put(obj);
+out_region:
+ intel_memory_region_put(mem);
+ return err;
+}
+
static int igt_mock_ppgtt_misaligned_dma(void *arg)
{
struct i915_ppgtt *ppgtt = arg;
@@ -560,7 +650,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
i915_vma_close(vma);
i915_gem_object_unpin_pages(obj);
- __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+ __i915_gem_object_put_pages(obj);
i915_gem_object_put(obj);
}
@@ -588,7 +678,7 @@ static void close_object_list(struct list_head *objects,
list_del(&obj->st_link);
i915_gem_object_unpin_pages(obj);
- __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+ __i915_gem_object_put_pages(obj);
i915_gem_object_put(obj);
}
}
@@ -858,7 +948,7 @@ static int igt_mock_ppgtt_64K(void *arg)
i915_vma_close(vma);
i915_gem_object_unpin_pages(obj);
- __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+ __i915_gem_object_put_pages(obj);
i915_gem_object_put(obj);
}
}
@@ -877,129 +967,25 @@ out_object_put:
return err;
}
-static struct i915_vma *
-gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val)
-{
- struct drm_i915_private *i915 = vma->vm->i915;
- const int gen = INTEL_GEN(i915);
- unsigned int count = vma->size >> PAGE_SHIFT;
- struct drm_i915_gem_object *obj;
- struct i915_vma *batch;
- unsigned int size;
- u32 *cmd;
- int n;
- int err;
-
- size = (1 + 4 * count) * sizeof(u32);
- size = round_up(size, PAGE_SIZE);
- obj = i915_gem_object_create_internal(i915, size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto err;
- }
-
- offset += vma->node.start;
-
- for (n = 0; n < count; n++) {
- if (gen >= 8) {
- *cmd++ = MI_STORE_DWORD_IMM_GEN4;
- *cmd++ = lower_32_bits(offset);
- *cmd++ = upper_32_bits(offset);
- *cmd++ = val;
- } else if (gen >= 4) {
- *cmd++ = MI_STORE_DWORD_IMM_GEN4 |
- (gen < 6 ? MI_USE_GGTT : 0);
- *cmd++ = 0;
- *cmd++ = offset;
- *cmd++ = val;
- } else {
- *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *cmd++ = offset;
- *cmd++ = val;
- }
-
- offset += PAGE_SIZE;
- }
-
- *cmd = MI_BATCH_BUFFER_END;
- i915_gem_chipset_flush(i915);
-
- i915_gem_object_unpin_map(obj);
-
- batch = i915_vma_instance(obj, vma->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto err;
- }
-
- err = i915_vma_pin(batch, 0, 0, PIN_USER);
- if (err)
- goto err;
-
- return batch;
-
-err:
- i915_gem_object_put(obj);
-
- return ERR_PTR(err);
-}
-
-static int gpu_write(struct i915_vma *vma,
- struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
- u32 dword,
- u32 value)
+static int gpu_write(struct intel_context *ce,
+ struct i915_vma *vma,
+ u32 dw,
+ u32 val)
{
- struct i915_request *rq;
- struct i915_vma *batch;
int err;
- GEM_BUG_ON(!intel_engine_can_store_dword(engine));
-
- batch = gpu_write_dw(vma, dword * sizeof(u32), value);
- if (IS_ERR(batch))
- return PTR_ERR(batch);
-
- rq = igt_request_alloc(ctx, engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_batch;
- }
-
- i915_vma_lock(batch);
- err = i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unlock(batch);
- if (err)
- goto err_request;
-
- i915_vma_lock(vma);
- err = i915_gem_object_set_to_gtt_domain(vma->obj, false);
- if (err == 0)
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
- if (err)
- goto err_request;
-
- err = engine->emit_bb_start(rq,
- batch->node.start, batch->node.size,
- 0);
-err_request:
+ i915_gem_object_lock(vma->obj);
+ err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+ i915_gem_object_unlock(vma->obj);
if (err)
- i915_request_skip(rq, err);
- i915_request_add(rq);
-err_batch:
- i915_vma_unpin(batch);
- i915_vma_close(batch);
- i915_vma_put(batch);
+ return err;
- return err;
+ return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32),
+ vma->size >> PAGE_SHIFT, val);
}
-static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+static int
+__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
{
unsigned int needs_flush;
unsigned long n;
@@ -1031,19 +1017,54 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
return err;
}
-static int __igt_write_huge(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
+static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+ unsigned long n = obj->base.size >> PAGE_SHIFT;
+ u32 *ptr;
+ int err;
+
+ err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ return err;
+
+ ptr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ ptr += dword;
+ while (n--) {
+ if (*ptr != val) {
+ pr_err("base[%u]=%08x, val=%08x\n",
+ dword, *ptr, val);
+ err = -EINVAL;
+ break;
+ }
+
+ ptr += PAGE_SIZE / sizeof(*ptr);
+ }
+
+ i915_gem_object_unpin_map(obj);
+ return err;
+}
+
+static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+ if (i915_gem_object_has_struct_page(obj))
+ return __cpu_check_shmem(obj, dword, val);
+ else
+ return __cpu_check_vmap(obj, dword, val);
+}
+
+static int __igt_write_huge(struct intel_context *ce,
struct drm_i915_gem_object *obj,
u64 size, u64 offset,
u32 dword, u32 val)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
struct i915_vma *vma;
int err;
- vma = i915_vma_instance(obj, vm, NULL);
+ vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -1057,7 +1078,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx,
* The ggtt may have some pages reserved so
* refrain from erroring out.
*/
- if (err == -ENOSPC && i915_is_ggtt(vm))
+ if (err == -ENOSPC && i915_is_ggtt(ce->vm))
err = 0;
goto out_vma_close;
@@ -1067,7 +1088,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx,
if (err)
goto out_vma_unpin;
- err = gpu_write(vma, ctx, engine, dword, val);
+ err = gpu_write(ce, vma, dword, val);
if (err) {
pr_err("gpu-write failed at offset=%llx\n", offset);
goto out_vma_unpin;
@@ -1082,22 +1103,20 @@ static int __igt_write_huge(struct i915_gem_context *ctx,
out_vma_unpin:
i915_vma_unpin(vma);
out_vma_close:
- i915_vma_destroy(vma);
-
+ __i915_vma_put(vma);
return err;
}
static int igt_write_huge(struct i915_gem_context *ctx,
struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
- static struct intel_engine_cs *engines[I915_NUM_ENGINES];
- struct intel_engine_cs *engine;
+ struct i915_gem_engines *engines;
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
I915_RND_STATE(prng);
IGT_TIMEOUT(end_time);
unsigned int max_page_size;
- unsigned int id;
+ unsigned int count;
u64 max;
u64 num;
u64 size;
@@ -1111,19 +1130,18 @@ static int igt_write_huge(struct i915_gem_context *ctx,
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
size = round_up(size, I915_GTT_PAGE_SIZE_2M);
- max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
- max = div_u64((vm->total - size), max_page_size);
-
n = 0;
- for_each_engine(engine, i915, id) {
- if (!intel_engine_can_store_dword(engine)) {
- pr_info("store-dword-imm not supported on engine=%u\n",
- id);
+ count = 0;
+ max = U64_MAX;
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ count++;
+ if (!intel_engine_can_store_dword(ce->engine))
continue;
- }
- engines[n++] = engine;
- }
+ max = min(max, ce->vm->total);
+ n++;
+ }
+ i915_gem_context_unlock_engines(ctx);
if (!n)
return 0;
@@ -1132,23 +1150,30 @@ static int igt_write_huge(struct i915_gem_context *ctx,
* randomized order, lets also make feeding to the same engine a few
* times in succession a possibility by enlarging the permutation array.
*/
- order = i915_random_order(n * I915_NUM_ENGINES, &prng);
+ order = i915_random_order(count * count, &prng);
if (!order)
return -ENOMEM;
+ max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
+ max = div_u64(max - size, max_page_size);
+
/*
* Try various offsets in an ascending/descending fashion until we
* timeout -- we want to avoid issues hidden by effectively always using
* offset = 0.
*/
i = 0;
+ engines = i915_gem_context_lock_engines(ctx);
for_each_prime_number_from(num, 0, max) {
u64 offset_low = num * max_page_size;
u64 offset_high = (max - num) * max_page_size;
u32 dword = offset_in_page(num) / 4;
+ struct intel_context *ce;
- engine = engines[order[i] % n];
- i = (i + 1) % (n * I915_NUM_ENGINES);
+ ce = engines->engines[order[i] % engines->num_engines];
+ i = (i + 1) % (count * count);
+ if (!ce || !intel_engine_can_store_dword(ce->engine))
+ continue;
/*
* In order to utilize 64K pages we need to both pad the vma
@@ -1160,22 +1185,23 @@ static int igt_write_huge(struct i915_gem_context *ctx,
offset_low = round_down(offset_low,
I915_GTT_PAGE_SIZE_2M);
- err = __igt_write_huge(ctx, engine, obj, size, offset_low,
+ err = __igt_write_huge(ce, obj, size, offset_low,
dword, num + 1);
if (err)
break;
- err = __igt_write_huge(ctx, engine, obj, size, offset_high,
+ err = __igt_write_huge(ce, obj, size, offset_high,
dword, num + 1);
if (err)
break;
if (igt_timeout(end_time,
- "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
- __func__, engine->id, offset_low, offset_high,
+ "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
+ __func__, ce->engine->name, offset_low, offset_high,
max_page_size))
break;
}
+ i915_gem_context_unlock_engines(ctx);
kfree(order);
@@ -1267,7 +1293,7 @@ static int igt_ppgtt_exhaust_huge(void *arg)
}
i915_gem_object_unpin_pages(obj);
- __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+ __i915_gem_object_put_pages(obj);
i915_gem_object_put(obj);
}
}
@@ -1283,131 +1309,235 @@ out_device:
return err;
}
-static int igt_ppgtt_internal_huge(void *arg)
+typedef struct drm_i915_gem_object *
+(*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags);
+
+static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
+{
+ return i915->mm.gemfs && has_transparent_hugepage();
+}
+
+static struct drm_i915_gem_object *
+igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+ if (!igt_can_allocate_thp(i915)) {
+ pr_info("%s missing THP support, skipping\n", __func__);
+ return ERR_PTR(-ENODEV);
+ }
+
+ return i915_gem_object_create_shmem(i915, size);
+}
+
+static struct drm_i915_gem_object *
+igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+ return i915_gem_object_create_internal(i915, size);
+}
+
+static struct drm_i915_gem_object *
+igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+ return huge_pages_object(i915, size, size);
+}
+
+static struct drm_i915_gem_object *
+igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+ return i915_gem_object_create_lmem(i915, size, flags);
+}
+
+static u32 igt_random_size(struct rnd_state *prng,
+ u32 min_page_size,
+ u32 max_page_size)
+{
+ u64 mask;
+ u32 size;
+
+ GEM_BUG_ON(!is_power_of_2(min_page_size));
+ GEM_BUG_ON(!is_power_of_2(max_page_size));
+ GEM_BUG_ON(min_page_size < PAGE_SIZE);
+ GEM_BUG_ON(min_page_size > max_page_size);
+
+ mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK;
+ size = prandom_u32_state(prng) & mask;
+ if (size < min_page_size)
+ size |= min_page_size;
+
+ return size;
+}
+
+static int igt_ppgtt_smoke_huge(void *arg)
{
struct i915_gem_context *ctx = arg;
struct drm_i915_private *i915 = ctx->i915;
struct drm_i915_gem_object *obj;
- static const unsigned int sizes[] = {
- SZ_64K,
- SZ_128K,
- SZ_256K,
- SZ_512K,
- SZ_1M,
- SZ_2M,
+ I915_RND_STATE(prng);
+ struct {
+ igt_create_fn fn;
+ u32 min;
+ u32 max;
+ } backends[] = {
+ { igt_create_internal, SZ_64K, SZ_2M, },
+ { igt_create_shmem, SZ_64K, SZ_32M, },
+ { igt_create_local, SZ_64K, SZ_1G, },
};
- int i;
int err;
+ int i;
/*
- * Sanity check that the HW uses huge pages correctly through internal
- * -- ensure that our writes land in the right place.
+ * Sanity check that the HW uses huge pages correctly through our
+ * various backends -- ensure that our writes land in the right place.
*/
- for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
- unsigned int size = sizes[i];
+ for (i = 0; i < ARRAY_SIZE(backends); ++i) {
+ u32 min = backends[i].min;
+ u32 max = backends[i].max;
+ u32 size = max;
+try_again:
+ size = igt_random_size(&prng, min, rounddown_pow_of_two(size));
- obj = i915_gem_object_create_internal(i915, size);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ obj = backends[i].fn(i915, size, 0);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ if (err == -E2BIG) {
+ size >>= 1;
+ goto try_again;
+ } else if (err == -ENODEV) {
+ err = 0;
+ continue;
+ }
+
+ return err;
+ }
err = i915_gem_object_pin_pages(obj);
- if (err)
+ if (err) {
+ if (err == -ENXIO || err == -E2BIG) {
+ i915_gem_object_put(obj);
+ size >>= 1;
+ goto try_again;
+ }
goto out_put;
+ }
- if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
- pr_info("internal unable to allocate huge-page(s) with size=%u\n",
- size);
+ if (obj->mm.page_sizes.phys < min) {
+ pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n",
+ __func__, size, i);
+ err = -ENOMEM;
goto out_unpin;
}
err = igt_write_huge(ctx, obj);
if (err) {
- pr_err("internal write-huge failed with size=%u\n",
- size);
- goto out_unpin;
+ pr_err("%s write-huge failed with size=%u, i=%d\n",
+ __func__, size, i);
}
-
+out_unpin:
i915_gem_object_unpin_pages(obj);
- __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+ __i915_gem_object_put_pages(obj);
+out_put:
i915_gem_object_put(obj);
- }
- return 0;
+ if (err == -ENOMEM || err == -ENXIO)
+ err = 0;
-out_unpin:
- i915_gem_object_unpin_pages(obj);
-out_put:
- i915_gem_object_put(obj);
+ if (err)
+ break;
- return err;
-}
+ cond_resched();
+ }
-static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
-{
- return i915->mm.gemfs && has_transparent_hugepage();
+ return err;
}
-static int igt_ppgtt_gemfs_huge(void *arg)
+static int igt_ppgtt_sanity_check(void *arg)
{
struct i915_gem_context *ctx = arg;
struct drm_i915_private *i915 = ctx->i915;
- struct drm_i915_gem_object *obj;
- static const unsigned int sizes[] = {
- SZ_2M,
- SZ_4M,
- SZ_8M,
- SZ_16M,
- SZ_32M,
+ unsigned int supported = INTEL_INFO(i915)->page_sizes;
+ struct {
+ igt_create_fn fn;
+ unsigned int flags;
+ } backends[] = {
+ { igt_create_system, 0, },
+ { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, },
};
- int i;
+ struct {
+ u32 size;
+ u32 pages;
+ } combos[] = {
+ { SZ_64K, SZ_64K },
+ { SZ_2M, SZ_2M },
+ { SZ_2M, SZ_64K },
+ { SZ_2M - SZ_64K, SZ_64K },
+ { SZ_2M - SZ_4K, SZ_64K | SZ_4K },
+ { SZ_2M + SZ_4K, SZ_64K | SZ_4K },
+ { SZ_2M + SZ_4K, SZ_2M | SZ_4K },
+ { SZ_2M + SZ_64K, SZ_2M | SZ_64K },
+ };
+ int i, j;
int err;
+ if (supported == I915_GTT_PAGE_SIZE_4K)
+ return 0;
+
/*
- * Sanity check that the HW uses huge pages correctly through gemfs --
- * ensure that our writes land in the right place.
+ * Sanity check that the HW behaves with a limited set of combinations.
+ * We already have a bunch of randomised testing, which should give us
+ * a decent amount of variation between runs, however we should keep
+ * this to limit the chances of introducing a temporary regression, by
+ * testing the most obvious cases that might make something blow up.
*/
- if (!igt_can_allocate_thp(i915)) {
- pr_info("missing THP support, skipping\n");
- return 0;
- }
+ for (i = 0; i < ARRAY_SIZE(backends); ++i) {
+ for (j = 0; j < ARRAY_SIZE(combos); ++j) {
+ struct drm_i915_gem_object *obj;
+ u32 size = combos[j].size;
+ u32 pages = combos[j].pages;
+
+ obj = backends[i].fn(i915, size, backends[i].flags);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ if (err == -ENODEV) {
+ pr_info("Device lacks local memory, skipping\n");
+ err = 0;
+ break;
+ }
- for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
- unsigned int size = sizes[i];
+ return err;
+ }
- obj = i915_gem_object_create_shmem(i915, size);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ err = i915_gem_object_pin_pages(obj);
+ if (err) {
+ i915_gem_object_put(obj);
+ goto out;
+ }
- err = i915_gem_object_pin_pages(obj);
- if (err)
- goto out_put;
+ GEM_BUG_ON(pages > obj->base.size);
+ pages = pages & supported;
- if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
- pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n",
- size);
- goto out_unpin;
- }
+ if (pages)
+ obj->mm.page_sizes.sg = pages;
- err = igt_write_huge(ctx, obj);
- if (err) {
- pr_err("gemfs write-huge failed with size=%u\n",
- size);
- goto out_unpin;
+ err = igt_write_huge(ctx, obj);
+
+ i915_gem_object_unpin_pages(obj);
+ __i915_gem_object_put_pages(obj);
+ i915_gem_object_put(obj);
+
+ if (err) {
+ pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n",
+ __func__, size, pages, i, j);
+ goto out;
+ }
}
- i915_gem_object_unpin_pages(obj);
- __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
- i915_gem_object_put(obj);
+ cond_resched();
}
- return 0;
-
-out_unpin:
- i915_gem_object_unpin_pages(obj);
-out_put:
- i915_gem_object_put(obj);
+out:
+ if (err == -ENOMEM)
+ err = 0;
return err;
}
@@ -1417,12 +1547,15 @@ static int igt_ppgtt_pin_update(void *arg)
struct i915_gem_context *ctx = arg;
struct drm_i915_private *dev_priv = ctx->i915;
unsigned long supported = INTEL_INFO(dev_priv)->page_sizes;
- struct i915_address_space *vm = ctx->vm;
struct drm_i915_gem_object *obj;
+ struct i915_gem_engines_iter it;
+ struct i915_address_space *vm;
+ struct intel_context *ce;
struct i915_vma *vma;
unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+ unsigned int n;
int first, last;
- int err;
+ int err = 0;
/*
* Make sure there's no funny business when doing a PIN_UPDATE -- in the
@@ -1432,9 +1565,10 @@ static int igt_ppgtt_pin_update(void *arg)
* huge-gtt-pages.
*/
- if (!vm || !i915_vm_is_4lvl(vm)) {
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ if (!i915_vm_is_4lvl(vm)) {
pr_info("48b PPGTT not supported, skipping\n");
- return 0;
+ goto out_vm;
}
first = ilog2(I915_GTT_PAGE_SIZE_64K);
@@ -1487,7 +1621,7 @@ static int igt_ppgtt_pin_update(void *arg)
goto out_unpin;
}
- err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE);
+ err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL);
if (err)
goto out_unpin;
@@ -1518,11 +1652,24 @@ static int igt_ppgtt_pin_update(void *arg)
* land in the now stale 2M page.
*/
- err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf);
+ n = 0;
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ if (!intel_engine_can_store_dword(ce->engine))
+ continue;
+
+ err = gpu_write(ce, vma, n++, 0xdeadbeaf);
+ if (err)
+ break;
+ }
+ i915_gem_context_unlock_engines(ctx);
if (err)
goto out_unpin;
- err = cpu_check(obj, 0, 0xdeadbeaf);
+ while (n--) {
+ err = cpu_check(obj, n, 0xdeadbeaf);
+ if (err)
+ goto out_unpin;
+ }
out_unpin:
i915_vma_unpin(vma);
@@ -1530,6 +1677,8 @@ out_close:
i915_vma_close(vma);
out_put:
i915_gem_object_put(obj);
+out_vm:
+ i915_vm_put(vm);
return err;
}
@@ -1539,7 +1688,7 @@ static int igt_tmpfs_fallback(void *arg)
struct i915_gem_context *ctx = arg;
struct drm_i915_private *i915 = ctx->i915;
struct vfsmount *gemfs = i915->mm.gemfs;
- struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+ struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx);
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
u32 *vaddr;
@@ -1589,6 +1738,7 @@ out_put:
out_restore:
i915->mm.gemfs = gemfs;
+ i915_vm_put(vm);
return err;
}
@@ -1596,11 +1746,14 @@ static int igt_shrink_thp(void *arg)
{
struct i915_gem_context *ctx = arg;
struct drm_i915_private *i915 = ctx->i915;
- struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+ struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx);
struct drm_i915_gem_object *obj;
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
struct i915_vma *vma;
unsigned int flags = PIN_USER;
- int err;
+ unsigned int n;
+ int err = 0;
/*
* Sanity check shrinking huge-paged object -- make sure nothing blows
@@ -1609,12 +1762,14 @@ static int igt_shrink_thp(void *arg)
if (!igt_can_allocate_thp(i915)) {
pr_info("missing THP support, skipping\n");
- return 0;
+ goto out_vm;
}
obj = i915_gem_object_create_shmem(i915, SZ_2M);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_vm;
+ }
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
@@ -1635,11 +1790,20 @@ static int igt_shrink_thp(void *arg)
if (err)
goto out_unpin;
- err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf);
- if (err)
- goto out_unpin;
+ n = 0;
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ if (!intel_engine_can_store_dword(ce->engine))
+ continue;
+
+ err = gpu_write(ce, vma, n++, 0xdeadbeaf);
+ if (err)
+ break;
+ }
+ i915_gem_context_unlock_engines(ctx);
i915_vma_unpin(vma);
+ if (err)
+ goto out_close;
/*
* Now that the pages are *unpinned* shrink-all should invoke
@@ -1662,7 +1826,11 @@ static int igt_shrink_thp(void *arg)
if (err)
goto out_close;
- err = cpu_check(obj, 0, 0xdeadbeaf);
+ while (n--) {
+ err = cpu_check(obj, n, 0xdeadbeaf);
+ if (err)
+ break;
+ }
out_unpin:
i915_vma_unpin(vma);
@@ -1670,6 +1838,8 @@ out_close:
i915_vma_close(vma);
out_put:
i915_gem_object_put(obj);
+out_vm:
+ i915_vm_put(vm);
return err;
}
@@ -1678,6 +1848,7 @@ int i915_gem_huge_page_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_mock_exhaust_device_supported_pages),
+ SUBTEST(igt_mock_memory_region_huge_pages),
SUBTEST(igt_mock_ppgtt_misaligned_dma),
SUBTEST(igt_mock_ppgtt_huge_fill),
SUBTEST(igt_mock_ppgtt_64K),
@@ -1694,8 +1865,7 @@ int i915_gem_huge_page_mock_selftests(void)
mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
mkwrite_device_info(dev_priv)->ppgtt_size = 48;
- mutex_lock(&dev_priv->drm.struct_mutex);
- ppgtt = i915_ppgtt_create(dev_priv);
+ ppgtt = i915_ppgtt_create(&dev_priv->gt);
if (IS_ERR(ppgtt)) {
err = PTR_ERR(ppgtt);
goto out_unlock;
@@ -1720,58 +1890,52 @@ out_close:
i915_vm_put(&ppgtt->vm);
out_unlock:
- mutex_unlock(&dev_priv->drm.struct_mutex);
drm_dev_put(&dev_priv->drm);
-
return err;
}
-int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
+int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_shrink_thp),
SUBTEST(igt_ppgtt_pin_update),
SUBTEST(igt_tmpfs_fallback),
SUBTEST(igt_ppgtt_exhaust_huge),
- SUBTEST(igt_ppgtt_gemfs_huge),
- SUBTEST(igt_ppgtt_internal_huge),
+ SUBTEST(igt_ppgtt_smoke_huge),
+ SUBTEST(igt_ppgtt_sanity_check),
};
- struct drm_file *file;
struct i915_gem_context *ctx;
- intel_wakeref_t wakeref;
+ struct i915_address_space *vm;
+ struct file *file;
int err;
- if (!HAS_PPGTT(dev_priv)) {
+ if (!HAS_PPGTT(i915)) {
pr_info("PPGTT not supported, skipping live-selftests\n");
return 0;
}
- if (i915_terminally_wedged(dev_priv))
+ if (intel_gt_is_wedged(&i915->gt))
return 0;
- file = mock_file(dev_priv);
+ file = mock_file(i915);
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&dev_priv->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-
- ctx = live_context(dev_priv, file);
+ ctx = live_context(i915, file);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
- goto out_unlock;
+ goto out_file;
}
- if (ctx->vm)
- ctx->vm->scrub_64K = true;
+ mutex_lock(&ctx->mutex);
+ vm = i915_gem_context_vm(ctx);
+ if (vm)
+ WRITE_ONCE(vm->scrub_64K, true);
+ mutex_unlock(&ctx->mutex);
err = i915_subtests(tests, ctx);
-out_unlock:
- intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
- mutex_unlock(&dev_priv->drm.struct_mutex);
-
- mock_file_free(dev_priv, file);
-
+out_file:
+ fput(file);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index f3a5eb807c1c..b972be165e85 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -5,14 +5,17 @@
#include "i915_selftest.h"
+#include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
+
#include "selftests/igt_flush_test.h"
#include "selftests/mock_drm.h"
+#include "huge_gem_object.h"
#include "mock_context.h"
-static int igt_client_fill(void *arg)
+static int __igt_client_fill(struct intel_engine_cs *engine)
{
- struct intel_context *ce = arg;
- struct drm_i915_private *i915 = ce->gem_context->i915;
+ struct intel_context *ce = engine->kernel_context;
struct drm_i915_gem_object *obj;
struct rnd_state prng;
IGT_TIMEOUT(end);
@@ -21,16 +24,21 @@ static int igt_client_fill(void *arg)
prandom_seed_state(&prng, i915_selftest.random_seed);
+ intel_engine_pm_get(engine);
do {
- u32 sz = prandom_u32_state(&prng) % SZ_32M;
+ const u32 max_block_size = S16_MAX * PAGE_SIZE;
+ u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
+ u32 phys_sz = sz % (max_block_size + 1);
u32 val = prandom_u32_state(&prng);
u32 i;
sz = round_up(sz, PAGE_SIZE);
+ phys_sz = round_up(phys_sz, PAGE_SIZE);
- pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+ pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
+ phys_sz, sz, val);
- obj = i915_gem_object_create_internal(i915, sz);
+ obj = huge_gem_object(engine->i915, phys_sz, sz);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_flush;
@@ -52,7 +60,8 @@ static int igt_client_fill(void *arg)
* values after we do the set_to_cpu_domain and pick it up as a
* test failure.
*/
- memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+ memset32(vaddr, val ^ 0xdeadbeaf,
+ huge_gem_object_phys_size(obj) / sizeof(u32));
if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
obj->cache_dirty = true;
@@ -63,24 +72,13 @@ static int igt_client_fill(void *arg)
if (err)
goto err_unpin;
- /*
- * XXX: For now do the wait without the object resv lock to
- * ensure we don't deadlock.
- */
- err = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT);
- if (err)
- goto err_unpin;
-
i915_gem_object_lock(obj);
err = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (err)
goto err_unpin;
- for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+ for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
if (vaddr[i] != val) {
pr_err("vaddr[%u]=%x, expected=%x\n", i,
vaddr[i], val);
@@ -100,28 +98,46 @@ err_unpin:
err_put:
i915_gem_object_put(obj);
err_flush:
- mutex_lock(&i915->drm.struct_mutex);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- mutex_unlock(&i915->drm.struct_mutex);
-
if (err == -ENOMEM)
err = 0;
+ intel_engine_pm_put(engine);
return err;
}
+static int igt_client_fill(void *arg)
+{
+ int inst = 0;
+
+ do {
+ struct intel_engine_cs *engine;
+ int err;
+
+ engine = intel_engine_lookup_user(arg,
+ I915_ENGINE_CLASS_COPY,
+ inst++);
+ if (!engine)
+ return 0;
+
+ err = __igt_client_fill(engine);
+ if (err == -ENOMEM)
+ err = 0;
+ if (err)
+ return err;
+ } while (1);
+}
+
int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_client_fill),
};
- if (i915_terminally_wedged(i915))
+ if (intel_gt_is_wedged(&i915->gt))
return 0;
if (!HAS_ENGINE(i915, BCS0))
return 0;
- return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+ return i915_live_subtests(tests, i915);
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 8f22d3f18422..3f6079e1dfb6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -6,12 +6,20 @@
#include <linux/prime_numbers.h>
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
+
#include "i915_selftest.h"
#include "selftests/i915_random.h"
-static int cpu_set(struct drm_i915_gem_object *obj,
- unsigned long offset,
- u32 v)
+struct context {
+ struct drm_i915_gem_object *obj;
+ struct intel_engine_cs *engine;
+};
+
+static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
{
unsigned int needs_clflush;
struct page *page;
@@ -19,11 +27,11 @@ static int cpu_set(struct drm_i915_gem_object *obj,
u32 *cpu;
int err;
- err = i915_gem_object_prepare_write(obj, &needs_clflush);
+ err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
if (err)
return err;
- page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+ page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
cpu = map + offset_in_page(offset);
@@ -36,14 +44,12 @@ static int cpu_set(struct drm_i915_gem_object *obj,
drm_clflush_virt_range(cpu, sizeof(*cpu));
kunmap_atomic(map);
- i915_gem_object_finish_access(obj);
+ i915_gem_object_finish_access(ctx->obj);
return 0;
}
-static int cpu_get(struct drm_i915_gem_object *obj,
- unsigned long offset,
- u32 *v)
+static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
{
unsigned int needs_clflush;
struct page *page;
@@ -51,11 +57,11 @@ static int cpu_get(struct drm_i915_gem_object *obj,
u32 *cpu;
int err;
- err = i915_gem_object_prepare_read(obj, &needs_clflush);
+ err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
if (err)
return err;
- page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+ page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
cpu = map + offset_in_page(offset);
@@ -65,136 +71,137 @@ static int cpu_get(struct drm_i915_gem_object *obj,
*v = *cpu;
kunmap_atomic(map);
- i915_gem_object_finish_access(obj);
+ i915_gem_object_finish_access(ctx->obj);
return 0;
}
-static int gtt_set(struct drm_i915_gem_object *obj,
- unsigned long offset,
- u32 v)
+static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
{
struct i915_vma *vma;
u32 __iomem *map;
- int err;
+ int err = 0;
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, true);
- i915_gem_object_unlock(obj);
+ i915_gem_object_lock(ctx->obj);
+ err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
+ i915_gem_object_unlock(ctx->obj);
if (err)
return err;
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+ vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
if (IS_ERR(vma))
return PTR_ERR(vma);
+ intel_gt_pm_get(vma->vm->gt);
+
map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
- if (IS_ERR(map))
- return PTR_ERR(map);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto out_rpm;
+ }
iowrite32(v, &map[offset / sizeof(*map)]);
i915_vma_unpin_iomap(vma);
- return 0;
+out_rpm:
+ intel_gt_pm_put(vma->vm->gt);
+ return err;
}
-static int gtt_get(struct drm_i915_gem_object *obj,
- unsigned long offset,
- u32 *v)
+static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
{
struct i915_vma *vma;
u32 __iomem *map;
- int err;
+ int err = 0;
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, false);
- i915_gem_object_unlock(obj);
+ i915_gem_object_lock(ctx->obj);
+ err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
+ i915_gem_object_unlock(ctx->obj);
if (err)
return err;
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+ vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
if (IS_ERR(vma))
return PTR_ERR(vma);
+ intel_gt_pm_get(vma->vm->gt);
+
map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
- if (IS_ERR(map))
- return PTR_ERR(map);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto out_rpm;
+ }
*v = ioread32(&map[offset / sizeof(*map)]);
i915_vma_unpin_iomap(vma);
- return 0;
+out_rpm:
+ intel_gt_pm_put(vma->vm->gt);
+ return err;
}
-static int wc_set(struct drm_i915_gem_object *obj,
- unsigned long offset,
- u32 v)
+static int wc_set(struct context *ctx, unsigned long offset, u32 v)
{
u32 *map;
int err;
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_wc_domain(obj, true);
- i915_gem_object_unlock(obj);
+ i915_gem_object_lock(ctx->obj);
+ err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
+ i915_gem_object_unlock(ctx->obj);
if (err)
return err;
- map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC);
if (IS_ERR(map))
return PTR_ERR(map);
map[offset / sizeof(*map)] = v;
- i915_gem_object_unpin_map(obj);
+ i915_gem_object_unpin_map(ctx->obj);
return 0;
}
-static int wc_get(struct drm_i915_gem_object *obj,
- unsigned long offset,
- u32 *v)
+static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
{
u32 *map;
int err;
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_wc_domain(obj, false);
- i915_gem_object_unlock(obj);
+ i915_gem_object_lock(ctx->obj);
+ err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
+ i915_gem_object_unlock(ctx->obj);
if (err)
return err;
- map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC);
if (IS_ERR(map))
return PTR_ERR(map);
*v = map[offset / sizeof(*map)];
- i915_gem_object_unpin_map(obj);
+ i915_gem_object_unpin_map(ctx->obj);
return 0;
}
-static int gpu_set(struct drm_i915_gem_object *obj,
- unsigned long offset,
- u32 v)
+static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_request *rq;
struct i915_vma *vma;
u32 *cs;
int err;
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, true);
- i915_gem_object_unlock(obj);
+ i915_gem_object_lock(ctx->obj);
+ err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
+ i915_gem_object_unlock(ctx->obj);
if (err)
return err;
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+ vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
if (IS_ERR(vma))
return PTR_ERR(vma);
- rq = i915_request_create(i915->engine[RCS0]->kernel_context);
+ rq = intel_engine_create_kernel_request(ctx->engine);
if (IS_ERR(rq)) {
i915_vma_unpin(vma);
return PTR_ERR(rq);
@@ -207,12 +214,12 @@ static int gpu_set(struct drm_i915_gem_object *obj,
return PTR_ERR(cs);
}
- if (INTEL_GEN(i915) >= 8) {
+ if (INTEL_GEN(ctx->engine->i915) >= 8) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
*cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
*cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
*cs++ = v;
- } else if (INTEL_GEN(i915) >= 4) {
+ } else if (INTEL_GEN(ctx->engine->i915) >= 4) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = 0;
*cs++ = i915_ggtt_offset(vma) + offset;
@@ -226,7 +233,9 @@ static int gpu_set(struct drm_i915_gem_object *obj,
intel_ring_advance(rq, cs);
i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
i915_vma_unpin(vma);
@@ -235,29 +244,34 @@ static int gpu_set(struct drm_i915_gem_object *obj,
return err;
}
-static bool always_valid(struct drm_i915_private *i915)
+static bool always_valid(struct context *ctx)
{
return true;
}
-static bool needs_fence_registers(struct drm_i915_private *i915)
+static bool needs_fence_registers(struct context *ctx)
{
- return !i915_terminally_wedged(i915);
+ struct intel_gt *gt = ctx->engine->gt;
+
+ if (intel_gt_is_wedged(gt))
+ return false;
+
+ return gt->ggtt->num_fences;
}
-static bool needs_mi_store_dword(struct drm_i915_private *i915)
+static bool needs_mi_store_dword(struct context *ctx)
{
- if (i915_terminally_wedged(i915))
+ if (intel_gt_is_wedged(ctx->engine->gt))
return false;
- return intel_engine_can_store_dword(i915->engine[RCS0]);
+ return intel_engine_can_store_dword(ctx->engine);
}
static const struct igt_coherency_mode {
const char *name;
- int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v);
- int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v);
- bool (*valid)(struct drm_i915_private *i915);
+ int (*set)(struct context *ctx, unsigned long offset, u32 v);
+ int (*get)(struct context *ctx, unsigned long offset, u32 *v);
+ bool (*valid)(struct context *ctx);
} igt_coherency_mode[] = {
{ "cpu", cpu_set, cpu_get, always_valid },
{ "gtt", gtt_set, gtt_get, needs_fence_registers },
@@ -266,19 +280,37 @@ static const struct igt_coherency_mode {
{ },
};
+static struct intel_engine_cs *
+random_engine(struct drm_i915_private *i915, struct rnd_state *prng)
+{
+ struct intel_engine_cs *engine;
+ unsigned int count;
+
+ count = 0;
+ for_each_uabi_engine(engine, i915)
+ count++;
+
+ count = i915_prandom_u32_max_state(count, prng);
+ for_each_uabi_engine(engine, i915)
+ if (count-- == 0)
+ return engine;
+
+ return NULL;
+}
+
static int igt_gem_coherency(void *arg)
{
const unsigned int ncachelines = PAGE_SIZE/64;
- I915_RND_STATE(prng);
struct drm_i915_private *i915 = arg;
const struct igt_coherency_mode *read, *write, *over;
- struct drm_i915_gem_object *obj;
- intel_wakeref_t wakeref;
unsigned long count, n;
u32 *offsets, *values;
+ I915_RND_STATE(prng);
+ struct context ctx;
int err = 0;
- /* We repeatedly write, overwrite and read from a sequence of
+ /*
+ * We repeatedly write, overwrite and read from a sequence of
* cachelines in order to try and detect incoherency (unflushed writes
* from either the CPU or GPU). Each setter/getter uses our cache
* domain API which should prevent incoherency.
@@ -292,34 +324,40 @@ static int igt_gem_coherency(void *arg)
values = offsets + ncachelines;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ ctx.engine = random_engine(i915, &prng);
+ if (!ctx.engine) {
+ err = -ENODEV;
+ goto out_free;
+ }
+ pr_info("%s: using %s\n", __func__, ctx.engine->name);
+ intel_engine_pm_get(ctx.engine);
+
for (over = igt_coherency_mode; over->name; over++) {
if (!over->set)
continue;
- if (!over->valid(i915))
+ if (!over->valid(&ctx))
continue;
for (write = igt_coherency_mode; write->name; write++) {
if (!write->set)
continue;
- if (!write->valid(i915))
+ if (!write->valid(&ctx))
continue;
for (read = igt_coherency_mode; read->name; read++) {
if (!read->get)
continue;
- if (!read->valid(i915))
+ if (!read->valid(&ctx))
continue;
for_each_prime_number_from(count, 1, ncachelines) {
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto unlock;
+ ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(ctx.obj)) {
+ err = PTR_ERR(ctx.obj);
+ goto out_pm;
}
i915_random_reorder(offsets, ncachelines, &prng);
@@ -327,7 +365,7 @@ static int igt_gem_coherency(void *arg)
values[n] = prandom_u32_state(&prng);
for (n = 0; n < count; n++) {
- err = over->set(obj, offsets[n], ~values[n]);
+ err = over->set(&ctx, offsets[n], ~values[n]);
if (err) {
pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
n, count, over->name, err);
@@ -336,7 +374,7 @@ static int igt_gem_coherency(void *arg)
}
for (n = 0; n < count; n++) {
- err = write->set(obj, offsets[n], values[n]);
+ err = write->set(&ctx, offsets[n], values[n]);
if (err) {
pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
n, count, write->name, err);
@@ -347,7 +385,7 @@ static int igt_gem_coherency(void *arg)
for (n = 0; n < count; n++) {
u32 found;
- err = read->get(obj, offsets[n], &found);
+ err = read->get(&ctx, offsets[n], &found);
if (err) {
pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
n, count, read->name, err);
@@ -365,20 +403,20 @@ static int igt_gem_coherency(void *arg)
}
}
- i915_gem_object_put(obj);
+ i915_gem_object_put(ctx.obj);
}
}
}
}
-unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
+out_pm:
+ intel_engine_pm_put(ctx.engine);
+out_free:
kfree(offsets);
return err;
put_object:
- i915_gem_object_put(obj);
- goto unlock;
+ i915_gem_object_put(ctx.obj);
+ goto out_pm;
}
int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index eaa2b16574c7..7fc46861a54d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -7,6 +7,9 @@
#include <linux/prime_numbers.h>
#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
#include "gt/intel_reset.h"
#include "i915_selftest.h"
@@ -24,16 +27,20 @@
#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
+static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx)
+{
+ /* single threaded, private ctx */
+ return rcu_dereference_protected(ctx->vm, true);
+}
+
static int live_nop_switch(void *arg)
{
const unsigned int nctx = 1024;
struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine;
struct i915_gem_context **ctx;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
struct igt_live_test t;
- struct drm_file *file;
+ struct file *file;
unsigned long n;
int err = -ENODEV;
@@ -52,43 +59,49 @@ static int live_nop_switch(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
if (!ctx) {
err = -ENOMEM;
- goto out_unlock;
+ goto out_file;
}
for (n = 0; n < nctx; n++) {
ctx[n] = live_context(i915, file);
if (IS_ERR(ctx[n])) {
err = PTR_ERR(ctx[n]);
- goto out_unlock;
+ goto out_file;
}
}
- for_each_engine(engine, i915, id) {
- struct i915_request *rq;
+ for_each_uabi_engine(engine, i915) {
+ struct i915_request *rq = NULL;
unsigned long end_time, prime;
ktime_t times[2] = {};
times[0] = ktime_get_raw();
for (n = 0; n < nctx; n++) {
- rq = igt_request_alloc(ctx[n], engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out_unlock;
+ struct i915_request *this;
+
+ this = igt_request_alloc(ctx[n], engine);
+ if (IS_ERR(this)) {
+ err = PTR_ERR(this);
+ goto out_file;
}
- i915_request_add(rq);
+ if (rq) {
+ i915_request_await_dma_fence(this, &rq->fence);
+ i915_request_put(rq);
+ }
+ rq = i915_request_get(this);
+ i915_request_add(this);
}
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("Failed to populated %d contexts\n", nctx);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(&i915->gt);
+ i915_request_put(rq);
err = -EIO;
- goto out_unlock;
+ goto out_file;
}
+ i915_request_put(rq);
times[1] = ktime_get_raw();
@@ -97,17 +110,25 @@ static int live_nop_switch(void *arg)
err = igt_live_test_begin(&t, i915, __func__, engine->name);
if (err)
- goto out_unlock;
+ goto out_file;
end_time = jiffies + i915_selftest.timeout_jiffies;
for_each_prime_number_from(prime, 2, 8192) {
times[1] = ktime_get_raw();
+ rq = NULL;
for (n = 0; n < prime; n++) {
- rq = igt_request_alloc(ctx[n % nctx], engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out_unlock;
+ struct i915_request *this;
+
+ this = igt_request_alloc(ctx[n % nctx], engine);
+ if (IS_ERR(this)) {
+ err = PTR_ERR(this);
+ goto out_file;
+ }
+
+ if (rq) { /* Force submission order */
+ i915_request_await_dma_fence(this, &rq->fence);
+ i915_request_put(rq);
}
/*
@@ -124,14 +145,18 @@ static int live_nop_switch(void *arg)
* for latency.
*/
- i915_request_add(rq);
+ rq = i915_request_get(this);
+ i915_request_add(this);
}
+ GEM_BUG_ON(!rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("Switching between %ld contexts timed out\n",
prime);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(&i915->gt);
+ i915_request_put(rq);
break;
}
+ i915_request_put(rq);
times[1] = ktime_sub(ktime_get_raw(), times[1]);
if (prime == 2)
@@ -143,7 +168,7 @@ static int live_nop_switch(void *arg)
err = igt_live_test_end(&t);
if (err)
- goto out_unlock;
+ goto out_file;
pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
engine->name,
@@ -151,75 +176,237 @@ static int live_nop_switch(void *arg)
prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
}
-out_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- mock_file_free(i915, file);
+out_file:
+ fput(file);
return err;
}
-static struct i915_vma *
-gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
+struct parallel_switch {
+ struct task_struct *tsk;
+ struct intel_context *ce[2];
+};
+
+static int __live_parallel_switch1(void *data)
{
- struct drm_i915_gem_object *obj;
- const int gen = INTEL_GEN(vma->vm->i915);
- unsigned long n, size;
- u32 *cmd;
- int err;
+ struct parallel_switch *arg = data;
+ IGT_TIMEOUT(end_time);
+ unsigned long count;
- size = (4 * count + 1) * sizeof(u32);
- size = round_up(size, PAGE_SIZE);
- obj = i915_gem_object_create_internal(vma->vm->i915, size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
+ count = 0;
+ do {
+ struct i915_request *rq = NULL;
+ int err, n;
- cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto err;
+ err = 0;
+ for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
+ struct i915_request *prev = rq;
+
+ rq = i915_request_create(arg->ce[n]);
+ if (IS_ERR(rq)) {
+ i915_request_put(prev);
+ return PTR_ERR(rq);
+ }
+
+ i915_request_get(rq);
+ if (prev) {
+ err = i915_request_await_dma_fence(rq, &prev->fence);
+ i915_request_put(prev);
+ }
+
+ i915_request_add(rq);
+ }
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+ if (err)
+ return err;
+
+ count++;
+ } while (!__igt_timeout(end_time, NULL));
+
+ pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
+ return 0;
+}
+
+static int __live_parallel_switchN(void *data)
+{
+ struct parallel_switch *arg = data;
+ struct i915_request *rq = NULL;
+ IGT_TIMEOUT(end_time);
+ unsigned long count;
+ int n;
+
+ count = 0;
+ do {
+ for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
+ struct i915_request *prev = rq;
+ int err = 0;
+
+ rq = i915_request_create(arg->ce[n]);
+ if (IS_ERR(rq)) {
+ i915_request_put(prev);
+ return PTR_ERR(rq);
+ }
+
+ i915_request_get(rq);
+ if (prev) {
+ err = i915_request_await_dma_fence(rq, &prev->fence);
+ i915_request_put(prev);
+ }
+
+ i915_request_add(rq);
+ if (err) {
+ i915_request_put(rq);
+ return err;
+ }
+ }
+
+ count++;
+ } while (!__igt_timeout(end_time, NULL));
+ i915_request_put(rq);
+
+ pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
+ return 0;
+}
+
+static int live_parallel_switch(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ static int (* const func[])(void *arg) = {
+ __live_parallel_switch1,
+ __live_parallel_switchN,
+ NULL,
+ };
+ struct parallel_switch *data = NULL;
+ struct i915_gem_engines *engines;
+ struct i915_gem_engines_iter it;
+ int (* const *fn)(void *arg);
+ struct i915_gem_context *ctx;
+ struct intel_context *ce;
+ struct file *file;
+ int n, m, count;
+ int err = 0;
+
+ /*
+ * Check we can process switches on all engines simultaneously.
+ */
+
+ if (!DRIVER_CAPS(i915)->has_logical_contexts)
+ return 0;
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = live_context(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out_file;
}
- GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
- offset += vma->node.start;
+ engines = i915_gem_context_lock_engines(ctx);
+ count = engines->num_engines;
- for (n = 0; n < count; n++) {
- if (gen >= 8) {
- *cmd++ = MI_STORE_DWORD_IMM_GEN4;
- *cmd++ = lower_32_bits(offset);
- *cmd++ = upper_32_bits(offset);
- *cmd++ = value;
- } else if (gen >= 4) {
- *cmd++ = MI_STORE_DWORD_IMM_GEN4 |
- (gen < 6 ? MI_USE_GGTT : 0);
- *cmd++ = 0;
- *cmd++ = offset;
- *cmd++ = value;
- } else {
- *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *cmd++ = offset;
- *cmd++ = value;
+ data = kcalloc(count, sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ i915_gem_context_unlock_engines(ctx);
+ err = -ENOMEM;
+ goto out_file;
+ }
+
+ m = 0; /* Use the first context as our template for the engines */
+ for_each_gem_engine(ce, engines, it) {
+ err = intel_context_pin(ce);
+ if (err) {
+ i915_gem_context_unlock_engines(ctx);
+ goto out;
}
- offset += PAGE_SIZE;
+ data[m++].ce[0] = intel_context_get(ce);
}
- *cmd = MI_BATCH_BUFFER_END;
- i915_gem_object_flush_map(obj);
- i915_gem_object_unpin_map(obj);
+ i915_gem_context_unlock_engines(ctx);
- vma = i915_vma_instance(obj, vma->vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err;
+ /* Clone the same set of engines into the other contexts */
+ for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
+ ctx = live_context(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+
+ for (m = 0; m < count; m++) {
+ if (!data[m].ce[0])
+ continue;
+
+ ce = intel_context_create(data[m].ce[0]->engine);
+ if (IS_ERR(ce))
+ goto out;
+
+ err = intel_context_pin(ce);
+ if (err) {
+ intel_context_put(ce);
+ goto out;
+ }
+
+ data[m].ce[n] = ce;
+ }
}
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- goto err;
+ for (fn = func; !err && *fn; fn++) {
+ struct igt_live_test t;
+ int n;
- return vma;
+ err = igt_live_test_begin(&t, i915, __func__, "");
+ if (err)
+ break;
-err:
- i915_gem_object_put(obj);
- return ERR_PTR(err);
+ for (n = 0; n < count; n++) {
+ if (!data[n].ce[0])
+ continue;
+
+ data[n].tsk = kthread_run(*fn, &data[n],
+ "igt/parallel:%s",
+ data[n].ce[0]->engine->name);
+ if (IS_ERR(data[n].tsk)) {
+ err = PTR_ERR(data[n].tsk);
+ break;
+ }
+ get_task_struct(data[n].tsk);
+ }
+
+ yield(); /* start all threads before we kthread_stop() */
+
+ for (n = 0; n < count; n++) {
+ int status;
+
+ if (IS_ERR_OR_NULL(data[n].tsk))
+ continue;
+
+ status = kthread_stop(data[n].tsk);
+ if (status && !err)
+ err = status;
+
+ put_task_struct(data[n].tsk);
+ data[n].tsk = NULL;
+ }
+
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ }
+
+out:
+ for (n = 0; n < count; n++) {
+ for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
+ if (!data[n].ce[m])
+ continue;
+
+ intel_context_unpin(data[n].ce[m]);
+ intel_context_put(data[n].ce[m]);
+ }
+ }
+ kfree(data);
+out_file:
+ fput(file);
+ return err;
}
static unsigned long real_page_count(struct drm_i915_gem_object *obj)
@@ -232,100 +419,39 @@ static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
}
-static int gpu_fill(struct drm_i915_gem_object *obj,
- struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
+static int gpu_fill(struct intel_context *ce,
+ struct drm_i915_gem_object *obj,
unsigned int dw)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
- struct i915_request *rq;
struct i915_vma *vma;
- struct i915_vma *batch;
- unsigned int flags;
int err;
- GEM_BUG_ON(obj->base.size > vm->total);
- GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+ GEM_BUG_ON(obj->base.size > ce->vm->total);
+ GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
- vma = i915_vma_instance(obj, vm, NULL);
+ vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, false);
- i915_gem_object_unlock(obj);
- if (err)
- return err;
-
err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
if (err)
return err;
- /* Within the GTT the huge objects maps every page onto
+ /*
+ * Within the GTT the huge objects maps every page onto
* its 1024 real pages (using phys_pfn = dma_pfn % 1024).
* We set the nth dword within the page using the nth
* mapping via the GTT - this should exercise the GTT mapping
* whilst checking that each context provides a unique view
* into the object.
*/
- batch = gpu_fill_dw(vma,
- (dw * real_page_count(obj)) << PAGE_SHIFT |
- (dw * sizeof(u32)),
- real_page_count(obj),
- dw);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto err_vma;
- }
-
- rq = igt_request_alloc(ctx, engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_batch;
- }
-
- flags = 0;
- if (INTEL_GEN(vm->i915) <= 5)
- flags |= I915_DISPATCH_SECURE;
-
- err = engine->emit_bb_start(rq,
- batch->node.start, batch->node.size,
- flags);
- if (err)
- goto err_request;
-
- i915_vma_lock(batch);
- err = i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unlock(batch);
- if (err)
- goto skip_request;
-
- i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
- if (err)
- goto skip_request;
-
- i915_request_add(rq);
-
- i915_vma_unpin(batch);
- i915_vma_close(batch);
- i915_vma_put(batch);
-
+ err = igt_gpu_fill_dw(ce, vma,
+ (dw * real_page_count(obj)) << PAGE_SHIFT |
+ (dw * sizeof(u32)),
+ real_page_count(obj),
+ dw);
i915_vma_unpin(vma);
- return 0;
-
-skip_request:
- i915_request_skip(rq, err);
-err_request:
- i915_request_add(rq);
-err_batch:
- i915_vma_unpin(batch);
- i915_vma_put(batch);
-err_vma:
- i915_vma_unpin(vma);
return err;
}
@@ -404,17 +530,17 @@ out_unmap:
return err;
}
-static int file_add_object(struct drm_file *file,
- struct drm_i915_gem_object *obj)
+static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
{
int err;
GEM_BUG_ON(obj->base.handle_count);
/* tie the object to the drm_file for easy reaping */
- err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL);
+ err = idr_alloc(&to_drm_file(file)->object_idr,
+ &obj->base, 1, 0, GFP_KERNEL);
if (err < 0)
- return err;
+ return err;
i915_gem_object_get(obj);
obj->base.handle_count++;
@@ -422,19 +548,21 @@ static int file_add_object(struct drm_file *file,
}
static struct drm_i915_gem_object *
-create_test_object(struct i915_gem_context *ctx,
- struct drm_file *file,
+create_test_object(struct i915_address_space *vm,
+ struct file *file,
struct list_head *objects)
{
struct drm_i915_gem_object *obj;
- struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm;
u64 size;
int err;
+ /* Keep in GEM's good graces */
+ intel_gt_retire_requests(vm->gt);
+
size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
- obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size);
+ obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
if (IS_ERR(obj))
return obj;
@@ -462,11 +590,49 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
return npages / DW_PER_PAGE;
}
+static void throttle_release(struct i915_request **q, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (IS_ERR_OR_NULL(q[i]))
+ continue;
+
+ i915_request_put(fetch_and_zero(&q[i]));
+ }
+}
+
+static int throttle(struct intel_context *ce,
+ struct i915_request **q, int count)
+{
+ int i;
+
+ if (!IS_ERR_OR_NULL(q[0])) {
+ if (i915_request_wait(q[0],
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT) < 0)
+ return -EINTR;
+
+ i915_request_put(q[0]);
+ }
+
+ for (i = 0; i < count - 1; i++)
+ q[i] = q[i + 1];
+
+ q[i] = intel_context_create_request(ce);
+ if (IS_ERR(q[i]))
+ return PTR_ERR(q[i]);
+
+ i915_request_get(q[i]);
+ i915_request_add(q[i]);
+
+ return 0;
+}
+
static int igt_ctx_exec(void *arg)
{
struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine;
- enum intel_engine_id id;
int err = -ENODEV;
/*
@@ -478,13 +644,14 @@ static int igt_ctx_exec(void *arg)
if (!DRIVER_CAPS(i915)->has_logical_contexts)
return 0;
- for_each_engine(engine, i915, id) {
+ for_each_uabi_engine(engine, i915) {
struct drm_i915_gem_object *obj = NULL;
unsigned long ncontexts, ndwords, dw;
+ struct i915_request *tq[5] = {};
struct igt_live_test t;
- struct drm_file *file;
IGT_TIMEOUT(end_time);
LIST_HEAD(objects);
+ struct file *file;
if (!intel_engine_can_store_dword(engine))
continue;
@@ -496,41 +663,53 @@ static int igt_ctx_exec(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&i915->drm.struct_mutex);
-
err = igt_live_test_begin(&t, i915, __func__, engine->name);
if (err)
- goto out_unlock;
+ goto out_file;
ncontexts = 0;
ndwords = 0;
dw = 0;
while (!time_after(jiffies, end_time)) {
struct i915_gem_context *ctx;
- intel_wakeref_t wakeref;
+ struct intel_context *ce;
- ctx = live_context(i915, file);
+ ctx = kernel_context(i915);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
- goto out_unlock;
+ goto out_file;
}
+ ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
+ GEM_BUG_ON(IS_ERR(ce));
+
if (!obj) {
- obj = create_test_object(ctx, file, &objects);
+ obj = create_test_object(ce->vm, file, &objects);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
- goto out_unlock;
+ intel_context_put(ce);
+ kernel_context_close(ctx);
+ goto out_file;
}
}
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- err = gpu_fill(obj, ctx, engine, dw);
+ err = gpu_fill(ce, obj, dw);
if (err) {
- pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+ pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
ndwords, dw, max_dwords(obj),
- engine->name, ctx->hw_id,
- yesno(!!ctx->vm), err);
- goto out_unlock;
+ engine->name,
+ yesno(!!rcu_access_pointer(ctx->vm)),
+ err);
+ intel_context_put(ce);
+ kernel_context_close(ctx);
+ goto out_file;
+ }
+
+ err = throttle(ce, tq, ARRAY_SIZE(tq));
+ if (err) {
+ intel_context_put(ce);
+ kernel_context_close(ctx);
+ goto out_file;
}
if (++dw == max_dwords(obj)) {
@@ -540,6 +719,9 @@ static int igt_ctx_exec(void *arg)
ndwords++;
ncontexts++;
+
+ intel_context_put(ce);
+ kernel_context_close(ctx);
}
pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
@@ -557,14 +739,16 @@ static int igt_ctx_exec(void *arg)
dw += rem;
}
-out_unlock:
+out_file:
+ throttle_release(tq, ARRAY_SIZE(tq));
if (igt_live_test_end(&t))
err = -EIO;
- mutex_unlock(&i915->drm.struct_mutex);
- mock_file_free(i915, file);
+ fput(file);
if (err)
return err;
+
+ i915_gem_drain_freed_objects(i915);
}
return 0;
@@ -573,11 +757,11 @@ out_unlock:
static int igt_shared_ctx_exec(void *arg)
{
struct drm_i915_private *i915 = arg;
+ struct i915_request *tq[5] = {};
struct i915_gem_context *parent;
struct intel_engine_cs *engine;
- enum intel_engine_id id;
struct igt_live_test t;
- struct drm_file *file;
+ struct file *file;
int err = 0;
/*
@@ -592,24 +776,22 @@ static int igt_shared_ctx_exec(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&i915->drm.struct_mutex);
-
parent = live_context(i915, file);
if (IS_ERR(parent)) {
err = PTR_ERR(parent);
- goto out_unlock;
+ goto out_file;
}
if (!parent->vm) { /* not full-ppgtt; nothing to share */
err = 0;
- goto out_unlock;
+ goto out_file;
}
err = igt_live_test_begin(&t, i915, __func__, "");
if (err)
- goto out_unlock;
+ goto out_file;
- for_each_engine(engine, i915, id) {
+ for_each_uabi_engine(engine, i915) {
unsigned long ncontexts, ndwords, dw;
struct drm_i915_gem_object *obj = NULL;
IGT_TIMEOUT(end_time);
@@ -623,7 +805,7 @@ static int igt_shared_ctx_exec(void *arg)
ncontexts = 0;
while (!time_after(jiffies, end_time)) {
struct i915_gem_context *ctx;
- intel_wakeref_t wakeref;
+ struct intel_context *ce;
ctx = kernel_context(i915);
if (IS_ERR(ctx)) {
@@ -631,25 +813,39 @@ static int igt_shared_ctx_exec(void *arg)
goto out_test;
}
- __assign_ppgtt(ctx, parent->vm);
+ mutex_lock(&ctx->mutex);
+ __assign_ppgtt(ctx, ctx_vm(parent));
+ mutex_unlock(&ctx->mutex);
+
+ ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
+ GEM_BUG_ON(IS_ERR(ce));
if (!obj) {
- obj = create_test_object(parent, file, &objects);
+ obj = create_test_object(ctx_vm(parent),
+ file, &objects);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
+ intel_context_put(ce);
kernel_context_close(ctx);
goto out_test;
}
}
- err = 0;
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- err = gpu_fill(obj, ctx, engine, dw);
+ err = gpu_fill(ce, obj, dw);
if (err) {
- pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+ pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
ndwords, dw, max_dwords(obj),
- engine->name, ctx->hw_id,
- yesno(!!ctx->vm), err);
+ engine->name,
+ yesno(!!rcu_access_pointer(ctx->vm)),
+ err);
+ intel_context_put(ce);
+ kernel_context_close(ctx);
+ goto out_test;
+ }
+
+ err = throttle(ce, tq, ARRAY_SIZE(tq));
+ if (err) {
+ intel_context_put(ce);
kernel_context_close(ctx);
goto out_test;
}
@@ -662,6 +858,7 @@ static int igt_shared_ctx_exec(void *arg)
ndwords++;
ncontexts++;
+ intel_context_put(ce);
kernel_context_close(ctx);
}
pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
@@ -678,14 +875,15 @@ static int igt_shared_ctx_exec(void *arg)
dw += rem;
}
+
+ i915_gem_drain_freed_objects(i915);
}
out_test:
+ throttle_release(tq, ARRAY_SIZE(tq));
if (igt_live_test_end(&t))
err = -EIO;
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
-
- mock_file_free(i915, file);
+out_file:
+ fput(file);
return err;
}
@@ -717,6 +915,8 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
__i915_gem_object_flush_map(obj, 0, 64);
i915_gem_object_unpin_map(obj);
+ intel_gt_chipset_flush(vma->vm->gt);
+
vma = i915_vma_instance(obj, vma->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
@@ -746,7 +946,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
- vma = i915_vma_instance(obj, ce->gem_context->vm, NULL);
+ vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -779,21 +979,22 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
goto err_request;
i915_vma_lock(batch);
- err = i915_vma_move_to_active(batch, rq, 0);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
i915_vma_unlock(batch);
if (err)
goto skip_request;
i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
if (err)
goto skip_request;
- i915_vma_unpin(batch);
- i915_vma_close(batch);
- i915_vma_put(batch);
-
+ i915_vma_unpin_and_release(&batch, 0);
i915_vma_unpin(vma);
*rq_out = i915_request_get(rq);
@@ -807,8 +1008,7 @@ skip_request:
err_request:
i915_request_add(rq);
err_batch:
- i915_vma_unpin(batch);
- i915_vma_put(batch);
+ i915_vma_unpin_and_release(&batch, 0);
err_vma:
i915_vma_unpin(vma);
@@ -820,8 +1020,7 @@ err_vma:
#define TEST_RESET BIT(2)
static int
-__sseu_prepare(struct drm_i915_private *i915,
- const char *name,
+__sseu_prepare(const char *name,
unsigned int flags,
struct intel_context *ce,
struct igt_spinner **spin)
@@ -837,14 +1036,11 @@ __sseu_prepare(struct drm_i915_private *i915,
if (!*spin)
return -ENOMEM;
- ret = igt_spinner_init(*spin, i915);
+ ret = igt_spinner_init(*spin, ce->engine->gt);
if (ret)
goto err_free;
- rq = igt_spinner_create_request(*spin,
- ce->gem_context,
- ce->engine,
- MI_NOOP);
+ rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
goto err_fini;
@@ -870,8 +1066,7 @@ err_free:
}
static int
-__read_slice_count(struct drm_i915_private *i915,
- struct intel_context *ce,
+__read_slice_count(struct intel_context *ce,
struct drm_i915_gem_object *obj,
struct igt_spinner *spin,
u32 *rpcs)
@@ -900,7 +1095,7 @@ __read_slice_count(struct drm_i915_private *i915,
return ret;
}
- if (INTEL_GEN(i915) >= 11) {
+ if (INTEL_GEN(ce->engine->i915) >= 11) {
s_mask = GEN11_RPCS_S_CNT_MASK;
s_shift = GEN11_RPCS_S_CNT_SHIFT;
} else {
@@ -943,8 +1138,7 @@ __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
}
static int
-__sseu_finish(struct drm_i915_private *i915,
- const char *name,
+__sseu_finish(const char *name,
unsigned int flags,
struct intel_context *ce,
struct drm_i915_gem_object *obj,
@@ -956,19 +1150,18 @@ __sseu_finish(struct drm_i915_private *i915,
int ret = 0;
if (flags & TEST_RESET) {
- ret = i915_reset_engine(ce->engine, "sseu");
+ ret = intel_engine_reset(ce->engine, "sseu");
if (ret)
goto out;
}
- ret = __read_slice_count(i915, ce, obj,
+ ret = __read_slice_count(ce, obj,
flags & TEST_RESET ? NULL : spin, &rpcs);
ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
if (ret)
goto out;
- ret = __read_slice_count(i915, ce->engine->kernel_context, obj,
- NULL, &rpcs);
+ ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
out:
@@ -976,11 +1169,11 @@ out:
igt_spinner_end(spin);
if ((flags & TEST_IDLE) && ret == 0) {
- ret = i915_gem_wait_for_idle(i915, 0, MAX_SCHEDULE_TIMEOUT);
+ ret = igt_flush_test(ce->engine->i915);
if (ret)
return ret;
- ret = __read_slice_count(i915, ce, obj, NULL, &rpcs);
+ ret = __read_slice_count(ce, obj, NULL, &rpcs);
ret = __check_rpcs(name, rpcs, ret, expected,
"Context", " after idle!");
}
@@ -989,8 +1182,7 @@ out:
}
static int
-__sseu_test(struct drm_i915_private *i915,
- const char *name,
+__sseu_test(const char *name,
unsigned int flags,
struct intel_context *ce,
struct drm_i915_gem_object *obj,
@@ -999,15 +1191,17 @@ __sseu_test(struct drm_i915_private *i915,
struct igt_spinner *spin = NULL;
int ret;
- ret = __sseu_prepare(i915, name, flags, ce, &spin);
+ intel_engine_pm_get(ce->engine);
+
+ ret = __sseu_prepare(name, flags, ce, &spin);
if (ret)
- return ret;
+ goto out_pm;
- ret = __intel_context_reconfigure_sseu(ce, sseu);
+ ret = intel_context_reconfigure_sseu(ce, sseu);
if (ret)
goto out_spin;
- ret = __sseu_finish(i915, name, flags, ce, obj,
+ ret = __sseu_finish(name, flags, ce, obj,
hweight32(sseu.slice_mask), spin);
out_spin:
@@ -1016,6 +1210,8 @@ out_spin:
igt_spinner_fini(spin);
kfree(spin);
}
+out_pm:
+ intel_engine_pm_put(ce->engine);
return ret;
}
@@ -1024,53 +1220,15 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
const char *name,
unsigned int flags)
{
- struct intel_engine_cs *engine = i915->engine[RCS0];
- struct intel_sseu default_sseu = engine->sseu;
struct drm_i915_gem_object *obj;
- struct i915_gem_context *ctx;
- struct intel_context *ce;
- struct intel_sseu pg_sseu;
- intel_wakeref_t wakeref;
- struct drm_file *file;
- int ret;
-
- if (INTEL_GEN(i915) < 9)
- return 0;
-
- if (!RUNTIME_INFO(i915)->sseu.has_slice_pg)
- return 0;
+ int inst = 0;
+ int ret = 0;
- if (hweight32(default_sseu.slice_mask) < 2)
+ if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg)
return 0;
- /*
- * Gen11 VME friendly power-gated configuration with half enabled
- * sub-slices.
- */
- pg_sseu = default_sseu;
- pg_sseu.slice_mask = 1;
- pg_sseu.subslice_mask =
- ~(~0 << (hweight32(default_sseu.subslice_mask) / 2));
-
- pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
- name, flags, hweight32(default_sseu.slice_mask),
- hweight32(pg_sseu.slice_mask));
-
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
if (flags & TEST_RESET)
- igt_global_reset_lock(i915);
-
- mutex_lock(&i915->drm.struct_mutex);
-
- ctx = live_context(i915, file);
- if (IS_ERR(ctx)) {
- ret = PTR_ERR(ctx);
- goto out_unlock;
- }
- i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */
+ igt_global_reset_lock(&i915->gt);
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
@@ -1078,56 +1236,79 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
goto out_unlock;
}
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ do {
+ struct intel_engine_cs *engine;
+ struct intel_context *ce;
+ struct intel_sseu pg_sseu;
- ce = i915_gem_context_get_engine(ctx, RCS0);
- if (IS_ERR(ce)) {
- ret = PTR_ERR(ce);
- goto out_rpm;
- }
+ engine = intel_engine_lookup_user(i915,
+ I915_ENGINE_CLASS_RENDER,
+ inst++);
+ if (!engine)
+ break;
- ret = intel_context_pin(ce);
- if (ret)
- goto out_context;
+ if (hweight32(engine->sseu.slice_mask) < 2)
+ continue;
- /* First set the default mask. */
- ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
- if (ret)
- goto out_fail;
+ /*
+ * Gen11 VME friendly power-gated configuration with
+ * half enabled sub-slices.
+ */
+ pg_sseu = engine->sseu;
+ pg_sseu.slice_mask = 1;
+ pg_sseu.subslice_mask =
+ ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
+
+ pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
+ engine->name, name, flags,
+ hweight32(engine->sseu.slice_mask),
+ hweight32(pg_sseu.slice_mask));
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ ret = PTR_ERR(ce);
+ goto out_put;
+ }
- /* Then set a power-gated configuration. */
- ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
- if (ret)
- goto out_fail;
+ ret = intel_context_pin(ce);
+ if (ret)
+ goto out_ce;
- /* Back to defaults. */
- ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
- if (ret)
- goto out_fail;
+ /* First set the default mask. */
+ ret = __sseu_test(name, flags, ce, obj, engine->sseu);
+ if (ret)
+ goto out_unpin;
- /* One last power-gated configuration for the road. */
- ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
- if (ret)
- goto out_fail;
+ /* Then set a power-gated configuration. */
+ ret = __sseu_test(name, flags, ce, obj, pg_sseu);
+ if (ret)
+ goto out_unpin;
+
+ /* Back to defaults. */
+ ret = __sseu_test(name, flags, ce, obj, engine->sseu);
+ if (ret)
+ goto out_unpin;
+
+ /* One last power-gated configuration for the road. */
+ ret = __sseu_test(name, flags, ce, obj, pg_sseu);
+ if (ret)
+ goto out_unpin;
-out_fail:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+out_unpin:
+ intel_context_unpin(ce);
+out_ce:
+ intel_context_put(ce);
+ } while (!ret);
+
+ if (igt_flush_test(i915))
ret = -EIO;
- intel_context_unpin(ce);
-out_context:
- intel_context_put(ce);
-out_rpm:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+out_put:
i915_gem_object_put(obj);
out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
-
if (flags & TEST_RESET)
- igt_global_reset_unlock(i915);
-
- mock_file_free(i915, file);
+ igt_global_reset_unlock(&i915->gt);
if (ret)
pr_err("%s: Failed with %d!\n", name, ret);
@@ -1161,15 +1342,18 @@ static int igt_ctx_sseu(void *arg)
static int igt_ctx_readonly(void *arg)
{
struct drm_i915_private *i915 = arg;
+ unsigned long idx, ndwords, dw, num_engines;
struct drm_i915_gem_object *obj = NULL;
+ struct i915_request *tq[5] = {};
+ struct i915_gem_engines_iter it;
struct i915_address_space *vm;
struct i915_gem_context *ctx;
- unsigned long idx, ndwords, dw;
+ struct intel_context *ce;
struct igt_live_test t;
- struct drm_file *file;
I915_RND_STATE(prng);
IGT_TIMEOUT(end_time);
LIST_HEAD(objects);
+ struct file *file;
int err = -ENODEV;
/*
@@ -1182,56 +1366,63 @@ static int igt_ctx_readonly(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&i915->drm.struct_mutex);
-
err = igt_live_test_begin(&t, i915, __func__, "");
if (err)
- goto out_unlock;
+ goto out_file;
ctx = live_context(i915, file);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
- goto out_unlock;
+ goto out_file;
}
- vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm;
+ vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm;
if (!vm || !vm->has_read_only) {
err = 0;
- goto out_unlock;
+ goto out_file;
}
+ num_engines = 0;
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
+ if (intel_engine_can_store_dword(ce->engine))
+ num_engines++;
+ i915_gem_context_unlock_engines(ctx);
+
ndwords = 0;
dw = 0;
while (!time_after(jiffies, end_time)) {
- struct intel_engine_cs *engine;
- unsigned int id;
-
- for_each_engine(engine, i915, id) {
- intel_wakeref_t wakeref;
-
- if (!intel_engine_can_store_dword(engine))
+ for_each_gem_engine(ce,
+ i915_gem_context_lock_engines(ctx), it) {
+ if (!intel_engine_can_store_dword(ce->engine))
continue;
if (!obj) {
- obj = create_test_object(ctx, file, &objects);
+ obj = create_test_object(ce->vm, file, &objects);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
- goto out_unlock;
+ i915_gem_context_unlock_engines(ctx);
+ goto out_file;
}
if (prandom_u32_state(&prng) & 1)
i915_gem_object_set_readonly(obj);
}
- err = 0;
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- err = gpu_fill(obj, ctx, engine, dw);
+ err = gpu_fill(ce, obj, dw);
if (err) {
- pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+ pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
ndwords, dw, max_dwords(obj),
- engine->name, ctx->hw_id,
- yesno(!!ctx->vm), err);
- goto out_unlock;
+ ce->engine->name,
+ yesno(!!ctx_vm(ctx)),
+ err);
+ i915_gem_context_unlock_engines(ctx);
+ goto out_file;
+ }
+
+ err = throttle(ce, tq, ARRAY_SIZE(tq));
+ if (err) {
+ i915_gem_context_unlock_engines(ctx);
+ goto out_file;
}
if (++dw == max_dwords(obj)) {
@@ -1240,9 +1431,10 @@ static int igt_ctx_readonly(void *arg)
}
ndwords++;
}
+ i915_gem_context_unlock_engines(ctx);
}
- pr_info("Submitted %lu dwords (across %u engines)\n",
- ndwords, RUNTIME_INFO(i915)->num_engines);
+ pr_info("Submitted %lu dwords (across %lu engines)\n",
+ ndwords, num_engines);
dw = 0;
idx = 0;
@@ -1262,19 +1454,19 @@ static int igt_ctx_readonly(void *arg)
dw += rem;
}
-out_unlock:
+out_file:
+ throttle_release(tq, ARRAY_SIZE(tq));
if (igt_live_test_end(&t))
err = -EIO;
- mutex_unlock(&i915->drm.struct_mutex);
- mock_file_free(i915, file);
+ fput(file);
return err;
}
-static int check_scratch(struct i915_gem_context *ctx, u64 offset)
+static int check_scratch(struct i915_address_space *vm, u64 offset)
{
struct drm_mm_node *node =
- __drm_mm_interval_first(&ctx->vm->mm,
+ __drm_mm_interval_first(&vm->mm,
offset, offset + sizeof(u32) - 1);
if (!node || node->start > offset)
return 0;
@@ -1292,6 +1484,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
{
struct drm_i915_private *i915 = ctx->i915;
struct drm_i915_gem_object *obj;
+ struct i915_address_space *vm;
struct i915_request *rq;
struct i915_vma *vma;
u32 *cmd;
@@ -1306,7 +1499,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto err;
+ goto out;
}
*cmd++ = MI_STORE_DWORD_IMM_GEN4;
@@ -1322,17 +1515,20 @@ static int write_to_scratch(struct i915_gem_context *ctx,
__i915_gem_object_flush_map(obj, 0, 64);
i915_gem_object_unpin_map(obj);
- vma = i915_vma_instance(obj, ctx->vm, NULL);
+ intel_gt_chipset_flush(engine->gt);
+
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- goto err;
+ goto out_vm;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
if (err)
- goto err;
+ goto out_vm;
- err = check_scratch(ctx, offset);
+ err = check_scratch(vm, offset);
if (err)
goto err_unpin;
@@ -1347,26 +1543,27 @@ static int write_to_scratch(struct i915_gem_context *ctx,
goto err_request;
i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, 0);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, 0);
i915_vma_unlock(vma);
if (err)
goto skip_request;
i915_vma_unpin(vma);
- i915_vma_close(vma);
- i915_vma_put(vma);
i915_request_add(rq);
- return 0;
-
+ goto out_vm;
skip_request:
i915_request_skip(rq, err);
err_request:
i915_request_add(rq);
err_unpin:
i915_vma_unpin(vma);
-err:
+out_vm:
+ i915_vm_put(vm);
+out:
i915_gem_object_put(obj);
return err;
}
@@ -1377,6 +1574,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
{
struct drm_i915_private *i915 = ctx->i915;
struct drm_i915_gem_object *obj;
+ struct i915_address_space *vm;
const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
const u32 result = 0x100;
struct i915_request *rq;
@@ -1393,7 +1591,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto err;
+ goto out;
}
memset(cmd, POISON_INUSE, PAGE_SIZE);
@@ -1419,17 +1617,20 @@ static int read_from_scratch(struct i915_gem_context *ctx,
i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
- vma = i915_vma_instance(obj, ctx->vm, NULL);
+ intel_gt_chipset_flush(engine->gt);
+
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- goto err;
+ goto out_vm;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
if (err)
- goto err;
+ goto out_vm;
- err = check_scratch(ctx, offset);
+ err = check_scratch(vm, offset);
if (err)
goto err_unpin;
@@ -1444,7 +1645,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
goto err_request;
i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
if (err)
goto skip_request;
@@ -1458,27 +1661,27 @@ static int read_from_scratch(struct i915_gem_context *ctx,
err = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (err)
- goto err;
+ goto out_vm;
cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto err;
+ goto out_vm;
}
*value = cmd[result / sizeof(*cmd)];
i915_gem_object_unpin_map(obj);
- i915_gem_object_put(obj);
-
- return 0;
+ goto out_vm;
skip_request:
i915_request_skip(rq, err);
err_request:
i915_request_add(rq);
err_unpin:
i915_vma_unpin(vma);
-err:
+out_vm:
+ i915_vm_put(vm);
+out:
i915_gem_object_put(obj);
return err;
}
@@ -1487,13 +1690,11 @@ static int igt_vm_isolation(void *arg)
{
struct drm_i915_private *i915 = arg;
struct i915_gem_context *ctx_a, *ctx_b;
+ unsigned long num_engines, count;
struct intel_engine_cs *engine;
- intel_wakeref_t wakeref;
struct igt_live_test t;
- struct drm_file *file;
I915_RND_STATE(prng);
- unsigned long count;
- unsigned int id;
+ struct file *file;
u64 vm_total;
int err;
@@ -1509,36 +1710,33 @@ static int igt_vm_isolation(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&i915->drm.struct_mutex);
-
err = igt_live_test_begin(&t, i915, __func__, "");
if (err)
- goto out_unlock;
+ goto out_file;
ctx_a = live_context(i915, file);
if (IS_ERR(ctx_a)) {
err = PTR_ERR(ctx_a);
- goto out_unlock;
+ goto out_file;
}
ctx_b = live_context(i915, file);
if (IS_ERR(ctx_b)) {
err = PTR_ERR(ctx_b);
- goto out_unlock;
+ goto out_file;
}
/* We can only test vm isolation, if the vm are distinct */
- if (ctx_a->vm == ctx_b->vm)
- goto out_unlock;
+ if (ctx_vm(ctx_a) == ctx_vm(ctx_b))
+ goto out_file;
- vm_total = ctx_a->vm->total;
- GEM_BUG_ON(ctx_b->vm->total != vm_total);
+ vm_total = ctx_vm(ctx_a)->total;
+ GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total);
vm_total -= I915_GTT_PAGE_SIZE;
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
count = 0;
- for_each_engine(engine, i915, id) {
+ num_engines = 0;
+ for_each_uabi_engine(engine, i915) {
IGT_TIMEOUT(end_time);
unsigned long this = 0;
@@ -1551,7 +1749,7 @@ static int igt_vm_isolation(void *arg)
div64_u64_rem(i915_prandom_u64_state(&prng),
vm_total, &offset);
- offset &= -sizeof(u32);
+ offset = round_down(offset, alignof_dword);
offset += I915_GTT_PAGE_SIZE;
err = write_to_scratch(ctx_a, engine,
@@ -1560,7 +1758,7 @@ static int igt_vm_isolation(void *arg)
err = read_from_scratch(ctx_b, engine,
offset, &value);
if (err)
- goto out_rpm;
+ goto out_file;
if (value) {
pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
@@ -1569,42 +1767,24 @@ static int igt_vm_isolation(void *arg)
lower_32_bits(offset),
this);
err = -EINVAL;
- goto out_rpm;
+ goto out_file;
}
this++;
}
count += this;
+ num_engines++;
}
- pr_info("Checked %lu scratch offsets across %d engines\n",
- count, RUNTIME_INFO(i915)->num_engines);
+ pr_info("Checked %lu scratch offsets across %lu engines\n",
+ count, num_engines);
-out_rpm:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-out_unlock:
+out_file:
if (igt_live_test_end(&t))
err = -EIO;
- mutex_unlock(&i915->drm.struct_mutex);
-
- mock_file_free(i915, file);
+ fput(file);
return err;
}
-static __maybe_unused const char *
-__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines)
-{
- struct intel_engine_cs *engine;
- intel_engine_mask_t tmp;
-
- if (engines == ALL_ENGINES)
- return "all";
-
- for_each_engine_masked(engine, i915, engines, tmp)
- return engine->name;
-
- return "none";
-}
-
static bool skip_unused_engines(struct intel_context *ce, void *data)
{
return !ce->state;
@@ -1632,13 +1812,9 @@ static int mock_context_barrier(void *arg)
* a request; useful for retiring old state after loading new.
*/
- mutex_lock(&i915->drm.struct_mutex);
-
ctx = mock_context(i915, "mock");
- if (!ctx) {
- err = -ENOMEM;
- goto unlock;
- }
+ if (!ctx)
+ return -ENOMEM;
counter = 0;
err = context_barrier_task(ctx, 0,
@@ -1711,8 +1887,6 @@ static int mock_context_barrier(void *arg)
out:
mock_context_close(ctx);
-unlock:
- mutex_unlock(&i915->drm.struct_mutex);
return err;
#undef pr_fmt
#define pr_fmt(x) x
@@ -1736,10 +1910,11 @@ int i915_gem_context_mock_selftests(void)
return err;
}
-int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
+int i915_gem_context_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_nop_switch),
+ SUBTEST(live_parallel_switch),
SUBTEST(igt_ctx_exec),
SUBTEST(igt_ctx_readonly),
SUBTEST(igt_ctx_sseu),
@@ -1747,8 +1922,8 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
SUBTEST(igt_vm_isolation),
};
- if (i915_terminally_wedged(dev_priv))
+ if (intel_gt_is_wedged(&i915->gt))
return 0;
- return i915_subtests(tests, dev_priv);
+ return i915_live_subtests(tests, i915);
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index e3a64edef918..2a52b92586b9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -20,7 +20,7 @@ static int igt_dmabuf_export(void *arg)
if (IS_ERR(obj))
return PTR_ERR(obj);
- dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+ dmabuf = i915_gem_prime_export(&obj->base, 0);
i915_gem_object_put(obj);
if (IS_ERR(dmabuf)) {
pr_err("i915_gem_prime_export failed with err=%d\n",
@@ -44,7 +44,7 @@ static int igt_dmabuf_import_self(void *arg)
if (IS_ERR(obj))
return PTR_ERR(obj);
- dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+ dmabuf = i915_gem_prime_export(&obj->base, 0);
if (IS_ERR(dmabuf)) {
pr_err("i915_gem_prime_export failed with err=%d\n",
(int)PTR_ERR(dmabuf));
@@ -219,7 +219,7 @@ static int igt_dmabuf_export_vmap(void *arg)
if (IS_ERR(obj))
return PTR_ERR(obj);
- dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+ dmabuf = i915_gem_prime_export(&obj->base, 0);
if (IS_ERR(dmabuf)) {
pr_err("i915_gem_prime_export failed with err=%d\n",
(int)PTR_ERR(dmabuf));
@@ -254,106 +254,6 @@ err_obj:
return err;
}
-static int igt_dmabuf_export_kmap(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct drm_i915_gem_object *obj;
- struct dma_buf *dmabuf;
- void *ptr;
- int err;
-
- obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
- i915_gem_object_put(obj);
- if (IS_ERR(dmabuf)) {
- err = PTR_ERR(dmabuf);
- pr_err("i915_gem_prime_export failed with err=%d\n", err);
- return err;
- }
-
- ptr = dma_buf_kmap(dmabuf, 0);
- if (!ptr) {
- pr_err("dma_buf_kmap failed\n");
- err = -ENOMEM;
- goto err;
- }
-
- if (memchr_inv(ptr, 0, PAGE_SIZE)) {
- dma_buf_kunmap(dmabuf, 0, ptr);
- pr_err("Exported page[0] not initialiased to zero!\n");
- err = -EINVAL;
- goto err;
- }
-
- memset(ptr, 0xc5, PAGE_SIZE);
- dma_buf_kunmap(dmabuf, 0, ptr);
-
- ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
- if (IS_ERR(ptr)) {
- err = PTR_ERR(ptr);
- pr_err("i915_gem_object_pin_map failed with err=%d\n", err);
- goto err;
- }
- memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE);
- i915_gem_object_flush_map(obj);
- i915_gem_object_unpin_map(obj);
-
- ptr = dma_buf_kmap(dmabuf, 1);
- if (!ptr) {
- pr_err("dma_buf_kmap failed\n");
- err = -ENOMEM;
- goto err;
- }
-
- if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) {
- dma_buf_kunmap(dmabuf, 1, ptr);
- pr_err("Exported page[1] not set to 0xaa!\n");
- err = -EINVAL;
- goto err;
- }
-
- memset(ptr, 0xc5, PAGE_SIZE);
- dma_buf_kunmap(dmabuf, 1, ptr);
-
- ptr = dma_buf_kmap(dmabuf, 0);
- if (!ptr) {
- pr_err("dma_buf_kmap failed\n");
- err = -ENOMEM;
- goto err;
- }
- if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) {
- dma_buf_kunmap(dmabuf, 0, ptr);
- pr_err("Exported page[0] did not retain 0xc5!\n");
- err = -EINVAL;
- goto err;
- }
- dma_buf_kunmap(dmabuf, 0, ptr);
-
- ptr = dma_buf_kmap(dmabuf, 2);
- if (ptr) {
- pr_err("Erroneously kmapped beyond the end of the object!\n");
- dma_buf_kunmap(dmabuf, 2, ptr);
- err = -EINVAL;
- goto err;
- }
-
- ptr = dma_buf_kmap(dmabuf, -1);
- if (ptr) {
- pr_err("Erroneously kmapped before the start of the object!\n");
- dma_buf_kunmap(dmabuf, -1, ptr);
- err = -EINVAL;
- goto err;
- }
-
- err = 0;
-err:
- dma_buf_put(dmabuf);
- return err;
-}
-
int i915_gem_dmabuf_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
@@ -362,7 +262,6 @@ int i915_gem_dmabuf_mock_selftests(void)
SUBTEST(igt_dmabuf_import),
SUBTEST(igt_dmabuf_import_ownership),
SUBTEST(igt_dmabuf_export_vmap),
- SUBTEST(igt_dmabuf_export_kmap),
};
struct drm_i915_private *i915;
int err;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5c81f4b4813a..ef7c74cff28a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -6,10 +6,15 @@
#include <linux/prime_numbers.h>
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
+#include "gem/i915_gem_region.h"
#include "huge_gem_object.h"
#include "i915_selftest.h"
+#include "selftests/i915_random.h"
#include "selftests/igt_flush_test.h"
+#include "selftests/igt_mmap.h"
struct tile {
unsigned int width;
@@ -75,18 +80,103 @@ static u64 tiled_offset(const struct tile *tile, u64 v)
static int check_partial_mapping(struct drm_i915_gem_object *obj,
const struct tile *tile,
- unsigned long end_time)
+ struct rnd_state *prng)
{
- const unsigned int nreal = obj->scratch / PAGE_SIZE;
const unsigned long npages = obj->base.size / PAGE_SIZE;
+ struct i915_ggtt_view view;
struct i915_vma *vma;
unsigned long page;
+ u32 __iomem *io;
+ struct page *p;
+ unsigned int n;
+ u64 offset;
+ u32 *cpu;
int err;
- if (igt_timeout(end_time,
- "%s: timed out before tiling=%d stride=%d\n",
- __func__, tile->tiling, tile->stride))
- return -EINTR;
+ err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
+ if (err) {
+ pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
+ tile->tiling, tile->stride, err);
+ return err;
+ }
+
+ GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
+ GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
+
+ i915_gem_object_lock(obj);
+ err = i915_gem_object_set_to_gtt_domain(obj, true);
+ i915_gem_object_unlock(obj);
+ if (err) {
+ pr_err("Failed to flush to GTT write domain; err=%d\n", err);
+ return err;
+ }
+
+ page = i915_prandom_u32_max_state(npages, prng);
+ view = compute_partial_view(obj, page, MIN_CHUNK_PAGES);
+
+ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+ if (IS_ERR(vma)) {
+ pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
+ page, (int)PTR_ERR(vma));
+ return PTR_ERR(vma);
+ }
+
+ n = page - view.partial.offset;
+ GEM_BUG_ON(n >= view.partial.size);
+
+ io = i915_vma_pin_iomap(vma);
+ i915_vma_unpin(vma);
+ if (IS_ERR(io)) {
+ pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+ page, (int)PTR_ERR(io));
+ err = PTR_ERR(io);
+ goto out;
+ }
+
+ iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
+ i915_vma_unpin_iomap(vma);
+
+ offset = tiled_offset(tile, page << PAGE_SHIFT);
+ if (offset >= obj->base.size)
+ goto out;
+
+ intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
+
+ p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+ cpu = kmap(p) + offset_in_page(offset);
+ drm_clflush_virt_range(cpu, sizeof(*cpu));
+ if (*cpu != (u32)page) {
+ pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
+ page, n,
+ view.partial.offset,
+ view.partial.size,
+ vma->size >> PAGE_SHIFT,
+ tile->tiling ? tile_row_pages(obj) : 0,
+ vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
+ offset >> PAGE_SHIFT,
+ (unsigned int)offset_in_page(offset),
+ offset,
+ (u32)page, *cpu);
+ err = -EINVAL;
+ }
+ *cpu = 0;
+ drm_clflush_virt_range(cpu, sizeof(*cpu));
+ kunmap(p);
+
+out:
+ __i915_vma_put(vma);
+ return err;
+}
+
+static int check_partial_mappings(struct drm_i915_gem_object *obj,
+ const struct tile *tile,
+ unsigned long end_time)
+{
+ const unsigned int nreal = obj->scratch / PAGE_SIZE;
+ const unsigned long npages = obj->base.size / PAGE_SIZE;
+ struct i915_vma *vma;
+ unsigned long page;
+ int err;
err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
if (err) {
@@ -143,7 +233,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
if (offset >= obj->base.size)
continue;
- i915_gem_flush_ggtt_writes(to_i915(obj->base.dev));
+ intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
cpu = kmap(p) + offset_in_page(offset);
@@ -168,12 +258,43 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
if (err)
return err;
- i915_vma_destroy(vma);
+ __i915_vma_put(vma);
+
+ if (igt_timeout(end_time,
+ "%s: timed out after tiling=%d stride=%d\n",
+ __func__, tile->tiling, tile->stride))
+ return -EINTR;
}
return 0;
}
+static unsigned int
+setup_tile_size(struct tile *tile, struct drm_i915_private *i915)
+{
+ if (INTEL_GEN(i915) <= 2) {
+ tile->height = 16;
+ tile->width = 128;
+ tile->size = 11;
+ } else if (tile->tiling == I915_TILING_Y &&
+ HAS_128_BYTE_Y_TILING(i915)) {
+ tile->height = 32;
+ tile->width = 128;
+ tile->size = 12;
+ } else {
+ tile->height = 8;
+ tile->width = 512;
+ tile->size = 12;
+ }
+
+ if (INTEL_GEN(i915) < 4)
+ return 8192 / tile->width;
+ else if (INTEL_GEN(i915) < 7)
+ return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width;
+ else
+ return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width;
+}
+
static int igt_partial_tiling(void *arg)
{
const unsigned int nreal = 1 << 12; /* largest tile row x2 */
@@ -183,6 +304,9 @@ static int igt_partial_tiling(void *arg)
int tiling;
int err;
+ if (!i915_ggtt_has_aperture(&i915->ggtt))
+ return 0;
+
/* We want to check the page mapping and fencing of a large object
* mmapped through the GTT. The object we create is larger than can
* possibly be mmaped as a whole, and so we must use partial GGTT vma.
@@ -204,7 +328,6 @@ static int igt_partial_tiling(void *arg)
goto out;
}
- mutex_lock(&i915->drm.struct_mutex);
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
if (1) {
@@ -218,7 +341,7 @@ static int igt_partial_tiling(void *arg)
tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
tile.tiling = I915_TILING_NONE;
- err = check_partial_mapping(obj, &tile, end);
+ err = check_partial_mappings(obj, &tile, end);
if (err && err != -EINTR)
goto out_unlock;
}
@@ -240,10 +363,10 @@ static int igt_partial_tiling(void *arg)
tile.tiling = tiling;
switch (tiling) {
case I915_TILING_X:
- tile.swizzle = i915->mm.bit_6_swizzle_x;
+ tile.swizzle = i915->ggtt.bit_6_swizzle_x;
break;
case I915_TILING_Y:
- tile.swizzle = i915->mm.bit_6_swizzle_y;
+ tile.swizzle = i915->ggtt.bit_6_swizzle_y;
break;
}
@@ -252,31 +375,11 @@ static int igt_partial_tiling(void *arg)
tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
continue;
- if (INTEL_GEN(i915) <= 2) {
- tile.height = 16;
- tile.width = 128;
- tile.size = 11;
- } else if (tile.tiling == I915_TILING_Y &&
- HAS_128_BYTE_Y_TILING(i915)) {
- tile.height = 32;
- tile.width = 128;
- tile.size = 12;
- } else {
- tile.height = 8;
- tile.width = 512;
- tile.size = 12;
- }
-
- if (INTEL_GEN(i915) < 4)
- max_pitch = 8192 / tile.width;
- else if (INTEL_GEN(i915) < 7)
- max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
- else
- max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
+ max_pitch = setup_tile_size(&tile, i915);
for (pitch = max_pitch; pitch; pitch >>= 1) {
tile.stride = tile.width * pitch;
- err = check_partial_mapping(obj, &tile, end);
+ err = check_partial_mappings(obj, &tile, end);
if (err == -EINTR)
goto next_tiling;
if (err)
@@ -284,7 +387,7 @@ static int igt_partial_tiling(void *arg)
if (pitch > 2 && INTEL_GEN(i915) >= 4) {
tile.stride = tile.width * (pitch - 1);
- err = check_partial_mapping(obj, &tile, end);
+ err = check_partial_mappings(obj, &tile, end);
if (err == -EINTR)
goto next_tiling;
if (err)
@@ -293,7 +396,7 @@ static int igt_partial_tiling(void *arg)
if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
tile.stride = tile.width * (pitch + 1);
- err = check_partial_mapping(obj, &tile, end);
+ err = check_partial_mappings(obj, &tile, end);
if (err == -EINTR)
goto next_tiling;
if (err)
@@ -304,7 +407,7 @@ static int igt_partial_tiling(void *arg)
if (INTEL_GEN(i915) >= 4) {
for_each_prime_number(pitch, max_pitch) {
tile.stride = tile.width * pitch;
- err = check_partial_mapping(obj, &tile, end);
+ err = check_partial_mappings(obj, &tile, end);
if (err == -EINTR)
goto next_tiling;
if (err)
@@ -317,82 +420,188 @@ next_tiling: ;
out_unlock:
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
i915_gem_object_unpin_pages(obj);
out:
i915_gem_object_put(obj);
return err;
}
-static int make_obj_busy(struct drm_i915_gem_object *obj)
+static int igt_smoke_tiling(void *arg)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct i915_request *rq;
- struct i915_vma *vma;
+ const unsigned int nreal = 1 << 12; /* largest tile row x2 */
+ struct drm_i915_private *i915 = arg;
+ struct drm_i915_gem_object *obj;
+ intel_wakeref_t wakeref;
+ I915_RND_STATE(prng);
+ unsigned long count;
+ IGT_TIMEOUT(end);
int err;
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ if (!i915_ggtt_has_aperture(&i915->ggtt))
+ return 0;
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- return err;
+ /*
+ * igt_partial_tiling() does an exhastive check of partial tiling
+ * chunking, but will undoubtably run out of time. Here, we do a
+ * randomised search and hope over many runs of 1s with different
+ * seeds we will do a thorough check.
+ *
+ * Remember to look at the st_seed if we see a flip-flop in BAT!
+ */
- rq = i915_request_create(i915->engine[RCS0]->kernel_context);
- if (IS_ERR(rq)) {
- i915_vma_unpin(vma);
- return PTR_ERR(rq);
+ if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+ return 0;
+
+ obj = huge_gem_object(i915,
+ nreal << PAGE_SHIFT,
+ (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err) {
+ pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+ nreal, obj->base.size / PAGE_SIZE, err);
+ goto out;
}
- i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
- i915_request_add(rq);
+ count = 0;
+ do {
+ struct tile tile;
- i915_vma_unpin(vma);
- i915_gem_object_put(obj); /* leave it only alive via its active ref */
+ tile.tiling =
+ i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng);
+ switch (tile.tiling) {
+ case I915_TILING_NONE:
+ tile.height = 1;
+ tile.width = 1;
+ tile.size = 0;
+ tile.stride = 0;
+ tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
+ break;
+
+ case I915_TILING_X:
+ tile.swizzle = i915->ggtt.bit_6_swizzle_x;
+ break;
+ case I915_TILING_Y:
+ tile.swizzle = i915->ggtt.bit_6_swizzle_y;
+ break;
+ }
+
+ if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
+ tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
+ continue;
+
+ if (tile.tiling != I915_TILING_NONE) {
+ unsigned int max_pitch = setup_tile_size(&tile, i915);
+
+ tile.stride =
+ i915_prandom_u32_max_state(max_pitch, &prng);
+ tile.stride = (1 + tile.stride) * tile.width;
+ if (INTEL_GEN(i915) < 4)
+ tile.stride = rounddown_pow_of_two(tile.stride);
+ }
+
+ err = check_partial_mapping(obj, &tile, &prng);
+ if (err)
+ break;
+
+ count++;
+ } while (!__igt_timeout(end, NULL));
+ pr_info("%s: Completed %lu trials\n", __func__, count);
+
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ i915_gem_object_unpin_pages(obj);
+out:
+ i915_gem_object_put(obj);
return err;
}
+static int make_obj_busy(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct intel_engine_cs *engine;
+
+ for_each_uabi_engine(engine, i915) {
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ int err;
+
+ vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ return err;
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq)) {
+ i915_vma_unpin(vma);
+ return PTR_ERR(rq);
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq,
+ EXEC_OBJECT_WRITE);
+ i915_vma_unlock(vma);
+
+ i915_request_add(rq);
+ i915_vma_unpin(vma);
+ if (err)
+ return err;
+ }
+
+ i915_gem_object_put(obj); /* leave it only alive via its active ref */
+ return 0;
+}
+
static bool assert_mmap_offset(struct drm_i915_private *i915,
unsigned long size,
int expected)
{
struct drm_i915_gem_object *obj;
- int err;
+ struct i915_mmap_offset *mmo;
obj = i915_gem_object_create_internal(i915, size);
if (IS_ERR(obj))
return PTR_ERR(obj);
- err = create_mmap_offset(obj);
+ mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL);
i915_gem_object_put(obj);
- return err == expected;
+ return PTR_ERR_OR_ZERO(mmo) == expected;
}
static void disable_retire_worker(struct drm_i915_private *i915)
{
- i915_gem_shrinker_unregister(i915);
-
- intel_gt_pm_get(i915);
-
- cancel_delayed_work_sync(&i915->gem.retire_work);
- flush_work(&i915->gem.idle_work);
+ i915_gem_driver_unregister__shrinker(i915);
+ intel_gt_pm_get(&i915->gt);
+ cancel_delayed_work_sync(&i915->gt.requests.retire_work);
}
static void restore_retire_worker(struct drm_i915_private *i915)
{
- intel_gt_pm_put(i915);
+ igt_flush_test(i915);
+ intel_gt_pm_put(&i915->gt);
+ i915_gem_driver_register__shrinker(i915);
+}
- mutex_lock(&i915->drm.struct_mutex);
- igt_flush_test(i915, I915_WAIT_LOCKED);
- mutex_unlock(&i915->drm.struct_mutex);
+static void mmap_offset_lock(struct drm_i915_private *i915)
+ __acquires(&i915->drm.vma_offset_manager->vm_lock)
+{
+ write_lock(&i915->drm.vma_offset_manager->vm_lock);
+}
- i915_gem_shrinker_register(i915);
+static void mmap_offset_unlock(struct drm_i915_private *i915)
+ __releases(&i915->drm.vma_offset_manager->vm_lock)
+{
+ write_unlock(&i915->drm.vma_offset_manager->vm_lock);
}
static int igt_mmap_offset_exhaustion(void *arg)
@@ -400,26 +609,50 @@ static int igt_mmap_offset_exhaustion(void *arg)
struct drm_i915_private *i915 = arg;
struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
struct drm_i915_gem_object *obj;
- struct drm_mm_node resv, *hole;
- u64 hole_start, hole_end;
- int loop, err;
+ struct drm_mm_node *hole, *next;
+ struct i915_mmap_offset *mmo;
+ int loop, err = 0;
/* Disable background reaper */
disable_retire_worker(i915);
GEM_BUG_ON(!i915->gt.awake);
+ intel_gt_retire_requests(&i915->gt);
+ i915_gem_drain_freed_objects(i915);
/* Trim the device mmap space to only a page */
- memset(&resv, 0, sizeof(resv));
- drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
- resv.start = hole_start;
- resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
- err = drm_mm_reserve_node(mm, &resv);
+ mmap_offset_lock(i915);
+ loop = 1; /* PAGE_SIZE units */
+ list_for_each_entry_safe(hole, next, &mm->hole_stack, hole_stack) {
+ struct drm_mm_node *resv;
+
+ resv = kzalloc(sizeof(*resv), GFP_NOWAIT);
+ if (!resv) {
+ err = -ENOMEM;
+ goto out_park;
+ }
+
+ resv->start = drm_mm_hole_node_start(hole) + loop;
+ resv->size = hole->hole_size - loop;
+ resv->color = -1ul;
+ loop = 0;
+
+ if (!resv->size) {
+ kfree(resv);
+ continue;
+ }
+
+ pr_debug("Reserving hole [%llx + %llx]\n",
+ resv->start, resv->size);
+
+ err = drm_mm_reserve_node(mm, resv);
if (err) {
pr_err("Failed to trim VMA manager, err=%d\n", err);
+ kfree(resv);
goto out_park;
}
- break;
}
+ GEM_BUG_ON(!list_is_singular(&mm->hole_stack));
+ mmap_offset_unlock(i915);
/* Just fits! */
if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
@@ -442,9 +675,10 @@ static int igt_mmap_offset_exhaustion(void *arg)
goto out;
}
- err = create_mmap_offset(obj);
- if (err) {
+ mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL);
+ if (IS_ERR(mmo)) {
pr_err("Unable to insert object into reclaimed hole\n");
+ err = PTR_ERR(mmo);
goto err_obj;
}
@@ -458,7 +692,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
/* Now fill with busy dead objects that we expect to reap */
for (loop = 0; loop < 3; loop++) {
- if (i915_terminally_wedged(i915))
+ if (intel_gt_is_wedged(&i915->gt))
break;
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
@@ -467,27 +701,24 @@ static int igt_mmap_offset_exhaustion(void *arg)
goto out;
}
- mutex_lock(&i915->drm.struct_mutex);
err = make_obj_busy(obj);
- mutex_unlock(&i915->drm.struct_mutex);
if (err) {
pr_err("[loop %d] Failed to busy the object\n", loop);
goto err_obj;
}
-
- /* NB we rely on the _active_ reference to access obj now */
- GEM_BUG_ON(!i915_gem_object_is_active(obj));
- err = create_mmap_offset(obj);
- if (err) {
- pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
- loop, err);
- goto out;
- }
}
out:
- drm_mm_remove_node(&resv);
+ mmap_offset_lock(i915);
out_park:
+ drm_mm_for_each_node_safe(hole, next, mm) {
+ if (hole->color != -1ul)
+ continue;
+
+ drm_mm_remove_node(hole);
+ kfree(hole);
+ }
+ mmap_offset_unlock(i915);
restore_retire_worker(i915);
return err;
err_obj:
@@ -495,11 +726,515 @@ err_obj:
goto out;
}
+static int gtt_set(struct drm_i915_gem_object *obj)
+{
+ struct i915_vma *vma;
+ void __iomem *map;
+ int err = 0;
+
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ intel_gt_pm_get(vma->vm->gt);
+ map = i915_vma_pin_iomap(vma);
+ i915_vma_unpin(vma);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto out;
+ }
+
+ memset_io(map, POISON_INUSE, obj->base.size);
+ i915_vma_unpin_iomap(vma);
+
+out:
+ intel_gt_pm_put(vma->vm->gt);
+ return err;
+}
+
+static int gtt_check(struct drm_i915_gem_object *obj)
+{
+ struct i915_vma *vma;
+ void __iomem *map;
+ int err = 0;
+
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ intel_gt_pm_get(vma->vm->gt);
+ map = i915_vma_pin_iomap(vma);
+ i915_vma_unpin(vma);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto out;
+ }
+
+ if (memchr_inv((void __force *)map, POISON_FREE, obj->base.size)) {
+ pr_err("%s: Write via mmap did not land in backing store (GTT)\n",
+ obj->mm.region->name);
+ err = -EINVAL;
+ }
+ i915_vma_unpin_iomap(vma);
+
+out:
+ intel_gt_pm_put(vma->vm->gt);
+ return err;
+}
+
+static int wc_set(struct drm_i915_gem_object *obj)
+{
+ void *vaddr;
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ memset(vaddr, POISON_INUSE, obj->base.size);
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+
+ return 0;
+}
+
+static int wc_check(struct drm_i915_gem_object *obj)
+{
+ void *vaddr;
+ int err = 0;
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ if (memchr_inv(vaddr, POISON_FREE, obj->base.size)) {
+ pr_err("%s: Write via mmap did not land in backing store (WC)\n",
+ obj->mm.region->name);
+ err = -EINVAL;
+ }
+ i915_gem_object_unpin_map(obj);
+
+ return err;
+}
+
+static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
+{
+ if (type == I915_MMAP_TYPE_GTT &&
+ !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt))
+ return false;
+
+ if (type != I915_MMAP_TYPE_GTT &&
+ !i915_gem_object_type_has(obj,
+ I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+ I915_GEM_OBJECT_HAS_IOMEM))
+ return false;
+
+ return true;
+}
+
+#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24))
+static int __igt_mmap(struct drm_i915_private *i915,
+ struct drm_i915_gem_object *obj,
+ enum i915_mmap_type type)
+{
+ struct i915_mmap_offset *mmo;
+ struct vm_area_struct *area;
+ unsigned long addr;
+ int err, i;
+
+ if (!can_mmap(obj, type))
+ return 0;
+
+ err = wc_set(obj);
+ if (err == -ENXIO)
+ err = gtt_set(obj);
+ if (err)
+ return err;
+
+ mmo = mmap_offset_attach(obj, type, NULL);
+ if (IS_ERR(mmo))
+ return PTR_ERR(mmo);
+
+ addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
+ pr_debug("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, type, addr);
+
+ area = find_vma(current->mm, addr);
+ if (!area) {
+ pr_err("%s: Did not create a vm_area_struct for the mmap\n",
+ obj->mm.region->name);
+ err = -EINVAL;
+ goto out_unmap;
+ }
+
+ if (area->vm_private_data != mmo) {
+ pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n",
+ obj->mm.region->name);
+ err = -EINVAL;
+ goto out_unmap;
+ }
+
+ for (i = 0; i < obj->base.size / sizeof(u32); i++) {
+ u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
+ u32 x;
+
+ if (get_user(x, ux)) {
+ pr_err("%s: Unable to read from mmap, offset:%zd\n",
+ obj->mm.region->name, i * sizeof(x));
+ err = -EFAULT;
+ goto out_unmap;
+ }
+
+ if (x != expand32(POISON_INUSE)) {
+ pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
+ obj->mm.region->name,
+ i * sizeof(x), x, expand32(POISON_INUSE));
+ err = -EINVAL;
+ goto out_unmap;
+ }
+
+ x = expand32(POISON_FREE);
+ if (put_user(x, ux)) {
+ pr_err("%s: Unable to write to mmap, offset:%zd\n",
+ obj->mm.region->name, i * sizeof(x));
+ err = -EFAULT;
+ goto out_unmap;
+ }
+ }
+
+ if (type == I915_MMAP_TYPE_GTT)
+ intel_gt_flush_ggtt_writes(&i915->gt);
+
+ err = wc_check(obj);
+ if (err == -ENXIO)
+ err = gtt_check(obj);
+out_unmap:
+ vm_munmap(addr, obj->base.size);
+ return err;
+}
+
+static int igt_mmap(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_memory_region *mr;
+ enum intel_region_id id;
+
+ for_each_memory_region(mr, i915, id) {
+ unsigned long sizes[] = {
+ PAGE_SIZE,
+ mr->min_page_size,
+ SZ_4M,
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+ struct drm_i915_gem_object *obj;
+ int err;
+
+ obj = i915_gem_object_create_region(mr, sizes[i], 0);
+ if (obj == ERR_PTR(-ENODEV))
+ continue;
+
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT);
+ if (err == 0)
+ err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC);
+
+ i915_gem_object_put(obj);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int __igt_mmap_gpu(struct drm_i915_private *i915,
+ struct drm_i915_gem_object *obj,
+ enum i915_mmap_type type)
+{
+ struct intel_engine_cs *engine;
+ struct i915_mmap_offset *mmo;
+ unsigned long addr;
+ u32 __user *ux;
+ u32 bbe;
+ int err;
+
+ /*
+ * Verify that the mmap access into the backing store aligns with
+ * that of the GPU, i.e. that mmap is indeed writing into the same
+ * page as being read by the GPU.
+ */
+
+ if (!can_mmap(obj, type))
+ return 0;
+
+ err = wc_set(obj);
+ if (err == -ENXIO)
+ err = gtt_set(obj);
+ if (err)
+ return err;
+
+ mmo = mmap_offset_attach(obj, type, NULL);
+ if (IS_ERR(mmo))
+ return PTR_ERR(mmo);
+
+ addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
+ ux = u64_to_user_ptr((u64)addr);
+ bbe = MI_BATCH_BUFFER_END;
+ if (put_user(bbe, ux)) {
+ pr_err("%s: Unable to write to mmap\n", obj->mm.region->name);
+ err = -EFAULT;
+ goto out_unmap;
+ }
+
+ if (type == I915_MMAP_TYPE_GTT)
+ intel_gt_flush_ggtt_writes(&i915->gt);
+
+ for_each_uabi_engine(engine, i915) {
+ struct i915_request *rq;
+ struct i915_vma *vma;
+
+ vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_unmap;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto out_unmap;
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_unpin;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ i915_vma_unlock(vma);
+
+ err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ struct drm_printer p =
+ drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("%s(%s, %s): Failed to execute batch\n",
+ __func__, engine->name, obj->mm.region->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ intel_gt_set_wedged(engine->gt);
+ err = -EIO;
+ }
+ i915_request_put(rq);
+
+out_unpin:
+ i915_vma_unpin(vma);
+ if (err)
+ goto out_unmap;
+ }
+
+out_unmap:
+ vm_munmap(addr, obj->base.size);
+ return err;
+}
+
+static int igt_mmap_gpu(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_memory_region *mr;
+ enum intel_region_id id;
+
+ for_each_memory_region(mr, i915, id) {
+ struct drm_i915_gem_object *obj;
+ int err;
+
+ obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+ if (obj == ERR_PTR(-ENODEV))
+ continue;
+
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT);
+ if (err == 0)
+ err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC);
+
+ i915_gem_object_put(obj);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
+{
+ if (!pte_present(*pte) || pte_none(*pte)) {
+ pr_err("missing PTE:%lx\n",
+ (addr - (unsigned long)data) >> PAGE_SHIFT);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int check_absent_pte(pte_t *pte, unsigned long addr, void *data)
+{
+ if (pte_present(*pte) && !pte_none(*pte)) {
+ pr_err("present PTE:%lx; expected to be revoked\n",
+ (addr - (unsigned long)data) >> PAGE_SHIFT);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int check_present(unsigned long addr, unsigned long len)
+{
+ return apply_to_page_range(current->mm, addr, len,
+ check_present_pte, (void *)addr);
+}
+
+static int check_absent(unsigned long addr, unsigned long len)
+{
+ return apply_to_page_range(current->mm, addr, len,
+ check_absent_pte, (void *)addr);
+}
+
+static int prefault_range(u64 start, u64 len)
+{
+ const char __user *addr, *end;
+ char __maybe_unused c;
+ int err;
+
+ addr = u64_to_user_ptr(start);
+ end = addr + len;
+
+ for (; addr < end; addr += PAGE_SIZE) {
+ err = __get_user(c, addr);
+ if (err)
+ return err;
+ }
+
+ return __get_user(c, end - 1);
+}
+
+static int __igt_mmap_revoke(struct drm_i915_private *i915,
+ struct drm_i915_gem_object *obj,
+ enum i915_mmap_type type)
+{
+ struct i915_mmap_offset *mmo;
+ unsigned long addr;
+ int err;
+
+ if (!can_mmap(obj, type))
+ return 0;
+
+ mmo = mmap_offset_attach(obj, type, NULL);
+ if (IS_ERR(mmo))
+ return PTR_ERR(mmo);
+
+ addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
+ err = prefault_range(addr, obj->base.size);
+ if (err)
+ goto out_unmap;
+
+ GEM_BUG_ON(mmo->mmap_type == I915_MMAP_TYPE_GTT &&
+ !atomic_read(&obj->bind_count));
+
+ err = check_present(addr, obj->base.size);
+ if (err) {
+ pr_err("%s: was not present\n", obj->mm.region->name);
+ goto out_unmap;
+ }
+
+ /*
+ * After unbinding the object from the GGTT, its address may be reused
+ * for other objects. Ergo we have to revoke the previous mmap PTE
+ * access as it no longer points to the same object.
+ */
+ err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+ if (err) {
+ pr_err("Failed to unbind object!\n");
+ goto out_unmap;
+ }
+ GEM_BUG_ON(atomic_read(&obj->bind_count));
+
+ if (type != I915_MMAP_TYPE_GTT) {
+ __i915_gem_object_put_pages(obj);
+ if (i915_gem_object_has_pages(obj)) {
+ pr_err("Failed to put-pages object!\n");
+ err = -EINVAL;
+ goto out_unmap;
+ }
+ }
+
+ err = check_absent(addr, obj->base.size);
+ if (err) {
+ pr_err("%s: was not absent\n", obj->mm.region->name);
+ goto out_unmap;
+ }
+
+out_unmap:
+ vm_munmap(addr, obj->base.size);
+ return err;
+}
+
+static int igt_mmap_revoke(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_memory_region *mr;
+ enum intel_region_id id;
+
+ for_each_memory_region(mr, i915, id) {
+ struct drm_i915_gem_object *obj;
+ int err;
+
+ obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+ if (obj == ERR_PTR(-ENODEV))
+ continue;
+
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT);
+ if (err == 0)
+ err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC);
+
+ i915_gem_object_put(obj);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_partial_tiling),
+ SUBTEST(igt_smoke_tiling),
SUBTEST(igt_mmap_offset_exhaustion),
+ SUBTEST(igt_mmap),
+ SUBTEST(igt_mmap_revoke),
+ SUBTEST(igt_mmap_gpu),
};
return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
index e23d8c9e9298..62077fe46715 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -3,34 +3,254 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/sort.h>
+
+#include "gt/intel_gt.h"
+#include "gt/intel_engine_user.h"
+
#include "i915_selftest.h"
+#include "gem/i915_gem_context.h"
#include "selftests/igt_flush_test.h"
+#include "selftests/i915_random.h"
#include "selftests/mock_drm.h"
+#include "huge_gem_object.h"
#include "mock_context.h"
-static int igt_fill_blt(void *arg)
+static int wrap_ktime_compare(const void *A, const void *B)
{
- struct intel_context *ce = arg;
- struct drm_i915_private *i915 = ce->gem_context->i915;
- struct drm_i915_gem_object *obj;
+ const ktime_t *a = A, *b = B;
+
+ return ktime_compare(*a, *b);
+}
+
+static int __perf_fill_blt(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ int inst = 0;
+
+ do {
+ struct intel_engine_cs *engine;
+ ktime_t t[5];
+ int pass;
+ int err;
+
+ engine = intel_engine_lookup_user(i915,
+ I915_ENGINE_CLASS_COPY,
+ inst++);
+ if (!engine)
+ return 0;
+
+ intel_engine_pm_get(engine);
+ for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
+ struct intel_context *ce = engine->kernel_context;
+ ktime_t t0, t1;
+
+ t0 = ktime_get();
+
+ err = i915_gem_object_fill_blt(obj, ce, 0);
+ if (err)
+ break;
+
+ err = i915_gem_object_wait(obj,
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ break;
+
+ t1 = ktime_get();
+ t[pass] = ktime_sub(t1, t0);
+ }
+ intel_engine_pm_put(engine);
+ if (err)
+ return err;
+
+ sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
+ pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
+ engine->name,
+ obj->base.size >> 10,
+ div64_u64(mul_u32_u32(4 * obj->base.size,
+ 1000 * 1000 * 1000),
+ t[1] + 2 * t[2] + t[3]) >> 20);
+ } while (1);
+}
+
+static int perf_fill_blt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ static const unsigned long sizes[] = {
+ SZ_4K,
+ SZ_64K,
+ SZ_2M,
+ SZ_64M
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+ struct drm_i915_gem_object *obj;
+ int err;
+
+ obj = i915_gem_object_create_internal(i915, sizes[i]);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = __perf_fill_blt(obj);
+ i915_gem_object_put(obj);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int __perf_copy_blt(struct drm_i915_gem_object *src,
+ struct drm_i915_gem_object *dst)
+{
+ struct drm_i915_private *i915 = to_i915(src->base.dev);
+ int inst = 0;
+
+ do {
+ struct intel_engine_cs *engine;
+ ktime_t t[5];
+ int pass;
+ int err = 0;
+
+ engine = intel_engine_lookup_user(i915,
+ I915_ENGINE_CLASS_COPY,
+ inst++);
+ if (!engine)
+ return 0;
+
+ intel_engine_pm_get(engine);
+ for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
+ struct intel_context *ce = engine->kernel_context;
+ ktime_t t0, t1;
+
+ t0 = ktime_get();
+
+ err = i915_gem_object_copy_blt(src, dst, ce);
+ if (err)
+ break;
+
+ err = i915_gem_object_wait(dst,
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ break;
+
+ t1 = ktime_get();
+ t[pass] = ktime_sub(t1, t0);
+ }
+ intel_engine_pm_put(engine);
+ if (err)
+ return err;
+
+ sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
+ pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
+ engine->name,
+ src->base.size >> 10,
+ div64_u64(mul_u32_u32(4 * src->base.size,
+ 1000 * 1000 * 1000),
+ t[1] + 2 * t[2] + t[3]) >> 20);
+ } while (1);
+}
+
+static int perf_copy_blt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ static const unsigned long sizes[] = {
+ SZ_4K,
+ SZ_64K,
+ SZ_2M,
+ SZ_64M
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+ struct drm_i915_gem_object *src, *dst;
+ int err;
+
+ src = i915_gem_object_create_internal(i915, sizes[i]);
+ if (IS_ERR(src))
+ return PTR_ERR(src);
+
+ dst = i915_gem_object_create_internal(i915, sizes[i]);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto err_src;
+ }
+
+ err = __perf_copy_blt(src, dst);
+
+ i915_gem_object_put(dst);
+err_src:
+ i915_gem_object_put(src);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+struct igt_thread_arg {
+ struct drm_i915_private *i915;
+ struct i915_gem_context *ctx;
+ struct file *file;
struct rnd_state prng;
+ unsigned int n_cpus;
+};
+
+static int igt_fill_blt_thread(void *arg)
+{
+ struct igt_thread_arg *thread = arg;
+ struct drm_i915_private *i915 = thread->i915;
+ struct rnd_state *prng = &thread->prng;
+ struct drm_i915_gem_object *obj;
+ struct i915_gem_context *ctx;
+ struct intel_context *ce;
+ unsigned int prio;
IGT_TIMEOUT(end);
- u32 *vaddr;
- int err = 0;
+ int err;
+
+ ctx = thread->ctx;
+ if (!ctx) {
+ ctx = live_context(i915, thread->file);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
+ ctx->sched.priority = I915_USER_PRIORITY(prio);
+ }
- prandom_seed_state(&prng, i915_selftest.random_seed);
+ ce = i915_gem_context_get_engine(ctx, BCS0);
+ GEM_BUG_ON(IS_ERR(ce));
do {
- u32 sz = prandom_u32_state(&prng) % SZ_32M;
- u32 val = prandom_u32_state(&prng);
+ const u32 max_block_size = S16_MAX * PAGE_SIZE;
+ u32 val = prandom_u32_state(prng);
+ u64 total = ce->vm->total;
+ u32 phys_sz;
+ u32 sz;
+ u32 *vaddr;
u32 i;
+ /*
+ * If we have a tiny shared address space, like for the GGTT
+ * then we can't be too greedy.
+ */
+ if (i915_is_ggtt(ce->vm))
+ total = div64_u64(total, thread->n_cpus);
+
+ sz = min_t(u64, total >> 4, prandom_u32_state(prng));
+ phys_sz = sz % (max_block_size + 1);
+
sz = round_up(sz, PAGE_SIZE);
+ phys_sz = round_up(phys_sz, PAGE_SIZE);
- pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+ pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
+ phys_sz, sz, val);
- obj = i915_gem_object_create_internal(i915, sz);
+ obj = huge_gem_object(i915, phys_sz, sz);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_flush;
@@ -46,14 +266,13 @@ static int igt_fill_blt(void *arg)
* Make sure the potentially async clflush does its job, if
* required.
*/
- memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+ memset32(vaddr, val ^ 0xdeadbeaf,
+ huge_gem_object_phys_size(obj) / sizeof(u32));
if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
obj->cache_dirty = true;
- mutex_lock(&i915->drm.struct_mutex);
err = i915_gem_object_fill_blt(obj, ce, val);
- mutex_unlock(&i915->drm.struct_mutex);
if (err)
goto err_unpin;
@@ -63,7 +282,7 @@ static int igt_fill_blt(void *arg)
if (err)
goto err_unpin;
- for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+ for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
if (vaddr[i] != val) {
pr_err("vaddr[%u]=%x, expected=%x\n", i,
vaddr[i], val);
@@ -83,28 +302,261 @@ err_unpin:
err_put:
i915_gem_object_put(obj);
err_flush:
- mutex_lock(&i915->drm.struct_mutex);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- mutex_unlock(&i915->drm.struct_mutex);
+ if (err == -ENOMEM)
+ err = 0;
+
+ intel_context_put(ce);
+ return err;
+}
+
+static int igt_copy_blt_thread(void *arg)
+{
+ struct igt_thread_arg *thread = arg;
+ struct drm_i915_private *i915 = thread->i915;
+ struct rnd_state *prng = &thread->prng;
+ struct drm_i915_gem_object *src, *dst;
+ struct i915_gem_context *ctx;
+ struct intel_context *ce;
+ unsigned int prio;
+ IGT_TIMEOUT(end);
+ int err;
+
+ ctx = thread->ctx;
+ if (!ctx) {
+ ctx = live_context(i915, thread->file);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
+ ctx->sched.priority = I915_USER_PRIORITY(prio);
+ }
+
+ ce = i915_gem_context_get_engine(ctx, BCS0);
+ GEM_BUG_ON(IS_ERR(ce));
+
+ do {
+ const u32 max_block_size = S16_MAX * PAGE_SIZE;
+ u32 val = prandom_u32_state(prng);
+ u64 total = ce->vm->total;
+ u32 phys_sz;
+ u32 sz;
+ u32 *vaddr;
+ u32 i;
+
+ if (i915_is_ggtt(ce->vm))
+ total = div64_u64(total, thread->n_cpus);
+
+ sz = min_t(u64, total >> 4, prandom_u32_state(prng));
+ phys_sz = sz % (max_block_size + 1);
+
+ sz = round_up(sz, PAGE_SIZE);
+ phys_sz = round_up(phys_sz, PAGE_SIZE);
+
+ pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
+ phys_sz, sz, val);
+
+ src = huge_gem_object(i915, phys_sz, sz);
+ if (IS_ERR(src)) {
+ err = PTR_ERR(src);
+ goto err_flush;
+ }
+
+ vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_put_src;
+ }
+
+ memset32(vaddr, val,
+ huge_gem_object_phys_size(src) / sizeof(u32));
+
+ i915_gem_object_unpin_map(src);
+
+ if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+ src->cache_dirty = true;
+
+ dst = huge_gem_object(i915, phys_sz, sz);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto err_put_src;
+ }
+
+ vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_put_dst;
+ }
+
+ memset32(vaddr, val ^ 0xdeadbeaf,
+ huge_gem_object_phys_size(dst) / sizeof(u32));
+
+ if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+ dst->cache_dirty = true;
+
+ err = i915_gem_object_copy_blt(src, dst, ce);
+ if (err)
+ goto err_unpin;
+
+ i915_gem_object_lock(dst);
+ err = i915_gem_object_set_to_cpu_domain(dst, false);
+ i915_gem_object_unlock(dst);
+ if (err)
+ goto err_unpin;
+ for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) {
+ if (vaddr[i] != val) {
+ pr_err("vaddr[%u]=%x, expected=%x\n", i,
+ vaddr[i], val);
+ err = -EINVAL;
+ goto err_unpin;
+ }
+ }
+
+ i915_gem_object_unpin_map(dst);
+
+ i915_gem_object_put(src);
+ i915_gem_object_put(dst);
+ } while (!time_after(jiffies, end));
+
+ goto err_flush;
+
+err_unpin:
+ i915_gem_object_unpin_map(dst);
+err_put_dst:
+ i915_gem_object_put(dst);
+err_put_src:
+ i915_gem_object_put(src);
+err_flush:
if (err == -ENOMEM)
err = 0;
+ intel_context_put(ce);
+ return err;
+}
+
+static int igt_threaded_blt(struct drm_i915_private *i915,
+ int (*blt_fn)(void *arg),
+ unsigned int flags)
+#define SINGLE_CTX BIT(0)
+{
+ struct igt_thread_arg *thread;
+ struct task_struct **tsk;
+ unsigned int n_cpus, i;
+ I915_RND_STATE(prng);
+ int err = 0;
+
+ n_cpus = num_online_cpus() + 1;
+
+ tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
+ if (!tsk)
+ return 0;
+
+ thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
+ if (!thread)
+ goto out_tsk;
+
+ thread[0].file = mock_file(i915);
+ if (IS_ERR(thread[0].file)) {
+ err = PTR_ERR(thread[0].file);
+ goto out_thread;
+ }
+
+ if (flags & SINGLE_CTX) {
+ thread[0].ctx = live_context(i915, thread[0].file);
+ if (IS_ERR(thread[0].ctx)) {
+ err = PTR_ERR(thread[0].ctx);
+ goto out_file;
+ }
+ }
+
+ for (i = 0; i < n_cpus; ++i) {
+ thread[i].i915 = i915;
+ thread[i].file = thread[0].file;
+ thread[i].ctx = thread[0].ctx;
+ thread[i].n_cpus = n_cpus;
+ thread[i].prng =
+ I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
+
+ tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
+ if (IS_ERR(tsk[i])) {
+ err = PTR_ERR(tsk[i]);
+ break;
+ }
+
+ get_task_struct(tsk[i]);
+ }
+
+ yield(); /* start all threads before we kthread_stop() */
+
+ for (i = 0; i < n_cpus; ++i) {
+ int status;
+
+ if (IS_ERR_OR_NULL(tsk[i]))
+ continue;
+
+ status = kthread_stop(tsk[i]);
+ if (status && !err)
+ err = status;
+
+ put_task_struct(tsk[i]);
+ }
+
+out_file:
+ fput(thread[0].file);
+out_thread:
+ kfree(thread);
+out_tsk:
+ kfree(tsk);
return err;
}
+static int igt_fill_blt(void *arg)
+{
+ return igt_threaded_blt(arg, igt_fill_blt_thread, 0);
+}
+
+static int igt_fill_blt_ctx0(void *arg)
+{
+ return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX);
+}
+
+static int igt_copy_blt(void *arg)
+{
+ return igt_threaded_blt(arg, igt_copy_blt_thread, 0);
+}
+
+static int igt_copy_blt_ctx0(void *arg)
+{
+ return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX);
+}
+
int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_fill_blt),
+ SUBTEST(igt_fill_blt_ctx0),
+ SUBTEST(igt_copy_blt),
+ SUBTEST(igt_copy_blt_ctx0),
};
- if (i915_terminally_wedged(i915))
+ if (intel_gt_is_wedged(&i915->gt))
return 0;
if (!HAS_ENGINE(i915, BCS0))
return 0;
- return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+ return i915_live_subtests(tests, i915);
+}
+
+int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(perf_fill_blt),
+ SUBTEST(perf_copy_blt),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return i915_live_subtests(tests, i915);
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index 94a15e3f6db8..34932871b3a5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -25,9 +25,7 @@ static int mock_phys_object(void *arg)
goto out;
}
- mutex_lock(&i915->drm.struct_mutex);
err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
- mutex_unlock(&i915->drm.struct_mutex);
if (err) {
pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
goto out_obj;
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index b232e6d2cd92..6718da20f35d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -9,6 +9,9 @@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_pm.h"
#include "gt/intel_context.h"
+#include "gt/intel_gt.h"
+#include "i915_vma.h"
+#include "i915_drv.h"
#include "i915_request.h"
@@ -23,7 +26,7 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
* GGTT space, so do this first before we reserve a seqno for
* ourselves.
*/
- ce = i915_gem_context_get_engine(ctx, engine->id);
+ ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
if (IS_ERR(ce))
return ERR_CAST(ce);
@@ -32,3 +35,134 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
return rq;
}
+
+struct i915_vma *
+igt_emit_store_dw(struct i915_vma *vma,
+ u64 offset,
+ unsigned long count,
+ u32 val)
+{
+ struct drm_i915_gem_object *obj;
+ const int gen = INTEL_GEN(vma->vm->i915);
+ unsigned long n, size;
+ u32 *cmd;
+ int err;
+
+ size = (4 * count + 1) * sizeof(u32);
+ size = round_up(size, PAGE_SIZE);
+ obj = i915_gem_object_create_internal(vma->vm->i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto err;
+ }
+
+ GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
+ offset += vma->node.start;
+
+ for (n = 0; n < count; n++) {
+ if (gen >= 8) {
+ *cmd++ = MI_STORE_DWORD_IMM_GEN4;
+ *cmd++ = lower_32_bits(offset);
+ *cmd++ = upper_32_bits(offset);
+ *cmd++ = val;
+ } else if (gen >= 4) {
+ *cmd++ = MI_STORE_DWORD_IMM_GEN4 |
+ (gen < 6 ? MI_USE_GGTT : 0);
+ *cmd++ = 0;
+ *cmd++ = offset;
+ *cmd++ = val;
+ } else {
+ *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+ *cmd++ = offset;
+ *cmd++ = val;
+ }
+ offset += PAGE_SIZE;
+ }
+ *cmd = MI_BATCH_BUFFER_END;
+ i915_gem_object_unpin_map(obj);
+
+ intel_gt_chipset_flush(vma->vm->gt);
+
+ vma = i915_vma_instance(obj, vma->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err;
+
+ return vma;
+
+err:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+int igt_gpu_fill_dw(struct intel_context *ce,
+ struct i915_vma *vma, u64 offset,
+ unsigned long count, u32 val)
+{
+ struct i915_request *rq;
+ struct i915_vma *batch;
+ unsigned int flags;
+ int err;
+
+ GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
+ GEM_BUG_ON(!i915_vma_is_pinned(vma));
+
+ batch = igt_emit_store_dw(vma, offset, count, val);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_batch;
+ }
+
+ flags = 0;
+ if (INTEL_GEN(ce->vm->i915) <= 5)
+ flags |= I915_DISPATCH_SECURE;
+
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start, batch->node.size,
+ flags);
+ if (err)
+ goto err_request;
+
+ i915_vma_lock(batch);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ i915_vma_unlock(batch);
+ if (err)
+ goto skip_request;
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(vma);
+ if (err)
+ goto skip_request;
+
+ i915_request_add(rq);
+
+ i915_vma_unpin_and_release(&batch, 0);
+
+ return 0;
+
+skip_request:
+ i915_request_skip(rq, err);
+err_request:
+ i915_request_add(rq);
+err_batch:
+ i915_vma_unpin_and_release(&batch, 0);
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
index 0f17251cf75d..4221cf84d175 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
@@ -7,11 +7,26 @@
#ifndef __IGT_GEM_UTILS_H__
#define __IGT_GEM_UTILS_H__
+#include <linux/types.h>
+
struct i915_request;
struct i915_gem_context;
+struct i915_vma;
+
+struct intel_context;
struct intel_engine_cs;
struct i915_request *
igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
+struct i915_vma *
+igt_emit_store_dw(struct i915_vma *vma,
+ u64 offset,
+ unsigned long count,
+ u32 val);
+
+int igt_gpu_fill_dw(struct intel_context *ce,
+ struct i915_vma *vma, u64 offset,
+ unsigned long count, u32 val);
+
#endif /* __IGT_GEM_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index be8974ccff24..384143aa7776 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -5,6 +5,7 @@
*/
#include "mock_context.h"
+#include "selftests/mock_drm.h"
#include "selftests/mock_gtt.h"
struct i915_gem_context *
@@ -13,7 +14,6 @@ mock_context(struct drm_i915_private *i915,
{
struct i915_gem_context *ctx;
struct i915_gem_engines *e;
- int ret;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
@@ -23,6 +23,8 @@ mock_context(struct drm_i915_private *i915,
INIT_LIST_HEAD(&ctx->link);
ctx->i915 = i915;
+ i915_gem_context_set_persistence(ctx);
+
mutex_init(&ctx->engines_mutex);
e = default_engines(ctx);
if (IS_ERR(e))
@@ -30,32 +32,26 @@ mock_context(struct drm_i915_private *i915,
RCU_INIT_POINTER(ctx->engines, e);
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
- INIT_LIST_HEAD(&ctx->hw_id_link);
mutex_init(&ctx->mutex);
- ret = i915_gem_context_pin_hw_id(ctx);
- if (ret < 0)
- goto err_engines;
-
if (name) {
struct i915_ppgtt *ppgtt;
- ctx->name = kstrdup(name, GFP_KERNEL);
- if (!ctx->name)
- goto err_put;
+ strncpy(ctx->name, name, sizeof(ctx->name));
ppgtt = mock_ppgtt(i915, name);
if (!ppgtt)
goto err_put;
+ mutex_lock(&ctx->mutex);
__set_ppgtt(ctx, &ppgtt->vm);
+ mutex_unlock(&ctx->mutex);
+
i915_vm_put(&ppgtt->vm);
}
return ctx;
-err_engines:
- free_engines(rcu_access_pointer(ctx->engines));
err_free:
kfree(ctx);
return NULL;
@@ -73,22 +69,21 @@ void mock_context_close(struct i915_gem_context *ctx)
void mock_init_contexts(struct drm_i915_private *i915)
{
- init_contexts(i915);
+ init_contexts(&i915->gem.contexts);
}
struct i915_gem_context *
-live_context(struct drm_i915_private *i915, struct drm_file *file)
+live_context(struct drm_i915_private *i915, struct file *file)
{
struct i915_gem_context *ctx;
int err;
-
- lockdep_assert_held(&i915->drm.struct_mutex);
+ u32 id;
ctx = i915_gem_create_context(i915, 0);
if (IS_ERR(ctx))
return ctx;
- err = gem_context_register(ctx, file->driver_priv);
+ err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id);
if (err < 0)
goto err_ctx;
@@ -102,7 +97,16 @@ err_ctx:
struct i915_gem_context *
kernel_context(struct drm_i915_private *i915)
{
- return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL);
+ struct i915_gem_context *ctx;
+
+ ctx = i915_gem_create_context(i915, 0);
+ if (IS_ERR(ctx))
+ return ctx;
+
+ i915_gem_context_clear_bannable(ctx);
+ i915_gem_context_set_persistence(ctx);
+
+ return ctx;
}
void kernel_context_close(struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
index 0b926653914f..fb83d2f09212 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
@@ -7,6 +7,9 @@
#ifndef __MOCK_CONTEXT_H
#define __MOCK_CONTEXT_H
+struct file;
+struct drm_i915_private;
+
void mock_init_contexts(struct drm_i915_private *i915);
struct i915_gem_context *
@@ -16,7 +19,7 @@ mock_context(struct drm_i915_private *i915,
void mock_context_close(struct i915_gem_context *ctx);
struct i915_gem_context *
-live_context(struct drm_i915_private *i915, struct drm_file *file);
+live_context(struct drm_i915_private *i915, struct file *file);
struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
void kernel_context_close(struct i915_gem_context *ctx);
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
index b9e059d4328a..9272bef57092 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -76,20 +76,6 @@ static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
vm_unmap_ram(vaddr, mock->npages);
}
-static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
-{
- struct mock_dmabuf *mock = to_mock(dma_buf);
-
- return kmap(mock->pages[page_num]);
-}
-
-static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
-{
- struct mock_dmabuf *mock = to_mock(dma_buf);
-
- return kunmap(mock->pages[page_num]);
-}
-
static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
{
return -ENODEV;
@@ -99,8 +85,6 @@ static const struct dma_buf_ops mock_dmabuf_ops = {
.map_dma_buf = mock_map_dma_buf,
.unmap_dma_buf = mock_unmap_dma_buf,
.release = mock_dmabuf_release,
- .map = mock_dmabuf_kmap,
- .unmap = mock_dmabuf_kunmap,
.mmap = mock_dmabuf_mmap,
.vmap = mock_dmabuf_vmap,
.vunmap = mock_dmabuf_vunmap,
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
index f0f8bbd82dfc..22818bbb139d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
@@ -14,7 +14,7 @@ struct mock_dmabuf {
struct page *pages[];
};
-static struct mock_dmabuf *to_mock(struct dma_buf *buf)
+static inline struct mock_dmabuf *to_mock(struct dma_buf *buf)
{
return buf->priv;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
index 370360b4a148..688511afa883 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
@@ -7,6 +7,8 @@
#ifndef __MOCK_GEM_OBJECT_H__
#define __MOCK_GEM_OBJECT_H__
+#include "gem/i915_gem_object_types.h"
+
struct mock_object {
struct drm_i915_gem_object base;
};
OpenPOWER on IntegriCloud