summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c262
1 files changed, 176 insertions, 86 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index df6965761046..f9b54236102d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -27,6 +27,30 @@
#include "amdgpu.h"
#include "amdgpu_sched.h"
+#define to_amdgpu_ctx_entity(e) \
+ container_of((e), struct amdgpu_ctx_entity, entity)
+
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+ [AMDGPU_HW_IP_GFX] = 1,
+ [AMDGPU_HW_IP_COMPUTE] = 4,
+ [AMDGPU_HW_IP_DMA] = 2,
+ [AMDGPU_HW_IP_UVD] = 1,
+ [AMDGPU_HW_IP_VCE] = 1,
+ [AMDGPU_HW_IP_UVD_ENC] = 1,
+ [AMDGPU_HW_IP_VCN_DEC] = 1,
+ [AMDGPU_HW_IP_VCN_ENC] = 1,
+};
+
+static int amdgput_ctx_total_num_entities(void)
+{
+ unsigned i, num_entities = 0;
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+ num_entities += amdgpu_ctx_num_entities[i];
+
+ return num_entities;
+}
+
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
@@ -48,6 +72,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_ctx *ctx)
{
+ unsigned num_entities = amdgput_ctx_total_num_entities();
unsigned i, j;
int r;
@@ -60,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
- kref_init(&ctx->refcount);
- spin_lock_init(&ctx->ring_lock);
- ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
+
+ ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
sizeof(struct dma_fence*), GFP_KERNEL);
if (!ctx->fences)
return -ENOMEM;
- mutex_init(&ctx->lock);
+ ctx->entities[0] = kcalloc(num_entities,
+ sizeof(struct amdgpu_ctx_entity),
+ GFP_KERNEL);
+ if (!ctx->entities[0]) {
+ r = -ENOMEM;
+ goto error_free_fences;
+ }
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- ctx->rings[i].sequence = 1;
- ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
+ for (i = 0; i < num_entities; ++i) {
+ struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+
+ entity->sequence = 1;
+ entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
}
+ for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
+ ctx->entities[i] = ctx->entities[i - 1] +
+ amdgpu_ctx_num_entities[i - 1];
+
+ kref_init(&ctx->refcount);
+ spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
@@ -80,31 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
- /* create context entity for each ring */
- for (i = 0; i < adev->num_rings; i++) {
- struct amdgpu_ring *ring = adev->rings[i];
- struct drm_sched_rq *rq;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+ struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+ unsigned num_rings;
+
+ switch (i) {
+ case AMDGPU_HW_IP_GFX:
+ rings[0] = &adev->gfx.gfx_ring[0];
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ for (j = 0; j < adev->gfx.num_compute_rings; ++j)
+ rings[j] = &adev->gfx.compute_ring[j];
+ num_rings = adev->gfx.num_compute_rings;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ for (j = 0; j < adev->sdma.num_instances; ++j)
+ rings[j] = &adev->sdma.instance[j].ring;
+ num_rings = adev->sdma.num_instances;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ rings[0] = &adev->uvd.inst[0].ring;
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ rings[0] = &adev->vce.ring[0];
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ rings[0] = &adev->uvd.inst[0].ring_enc[0];
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ rings[0] = &adev->vcn.ring_dec;
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_ENC:
+ rings[0] = &adev->vcn.ring_enc[0];
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ rings[0] = &adev->vcn.ring_jpeg;
+ num_rings = 1;
+ break;
+ }
- rq = &ring->sched.sched_rq[priority];
+ for (j = 0; j < num_rings; ++j)
+ rqs[j] = &rings[j]->sched.sched_rq[priority];
- if (ring == &adev->gfx.kiq.ring)
- continue;
-
- r = drm_sched_entity_init(&ctx->rings[i].entity,
- &rq, 1, &ctx->guilty);
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+ r = drm_sched_entity_init(&ctx->entities[i][j].entity,
+ rqs, num_rings, &ctx->guilty);
if (r)
- goto failed;
+ goto error_cleanup_entities;
}
- r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
- if (r)
- goto failed;
-
return 0;
-failed:
- for (j = 0; j < i; j++)
- drm_sched_entity_destroy(&ctx->rings[j].entity);
+error_cleanup_entities:
+ for (i = 0; i < num_entities; ++i)
+ drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+ kfree(ctx->entities[0]);
+
+error_free_fences:
kfree(ctx->fences);
ctx->fences = NULL;
return r;
@@ -113,25 +191,47 @@ failed:
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
if (!adev)
return;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+ for (i = 0; i < num_entities; ++i)
for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(ctx->rings[i].fences[j]);
+ dma_fence_put(ctx->entities[0][i].fences[j]);
kfree(ctx->fences);
- ctx->fences = NULL;
-
- amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+ kfree(ctx->entities[0]);
mutex_destroy(&ctx->lock);
kfree(ctx);
}
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+ u32 ring, struct drm_sched_entity **entity)
+{
+ if (hw_ip >= AMDGPU_HW_IP_NUM) {
+ DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
+ return -EINVAL;
+ }
+
+ /* Right now all IPs have only one instance - multiple rings. */
+ if (instance != 0) {
+ DRM_DEBUG("invalid ip instance: %d\n", instance);
+ return -EINVAL;
+ }
+
+ if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
+ DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
+ return -EINVAL;
+ }
+
+ *entity = &ctx->entities[hw_ip][ring].entity;
+ return 0;
+}
+
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct drm_file *filp,
@@ -168,17 +268,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
+ unsigned num_entities;
u32 i;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
- for (i = 0; i < ctx->adev->num_rings; i++) {
+ num_entities = 0;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
+ num_entities += amdgpu_ctx_num_entities[i];
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
- continue;
-
- drm_sched_entity_destroy(&ctx->rings[i].entity);
- }
+ for (i = 0; i < num_entities; i++)
+ drm_sched_entity_destroy(&ctx->entities[0][i].entity);
amdgpu_ctx_fini(ref);
}
@@ -334,56 +434,56 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
return 0;
}
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
- struct dma_fence *fence, uint64_t* handler)
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence, uint64_t* handle)
{
- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
- uint64_t seq = cring->sequence;
- unsigned idx = 0;
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+ uint64_t seq = centity->sequence;
struct dma_fence *other = NULL;
+ unsigned idx = 0;
idx = seq & (amdgpu_sched_jobs - 1);
- other = cring->fences[idx];
+ other = centity->fences[idx];
if (other)
BUG_ON(!dma_fence_is_signaled(other));
dma_fence_get(fence);
spin_lock(&ctx->ring_lock);
- cring->fences[idx] = fence;
- cring->sequence++;
+ centity->fences[idx] = fence;
+ centity->sequence++;
spin_unlock(&ctx->ring_lock);
dma_fence_put(other);
- if (handler)
- *handler = seq;
-
- return 0;
+ if (handle)
+ *handle = seq;
}
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
- struct amdgpu_ring *ring, uint64_t seq)
+ struct drm_sched_entity *entity,
+ uint64_t seq)
{
- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
struct dma_fence *fence;
spin_lock(&ctx->ring_lock);
if (seq == ~0ull)
- seq = ctx->rings[ring->idx].sequence - 1;
+ seq = centity->sequence - 1;
- if (seq >= cring->sequence) {
+ if (seq >= centity->sequence) {
spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL);
}
- if (seq + amdgpu_sched_jobs < cring->sequence) {
+ if (seq + amdgpu_sched_jobs < centity->sequence) {
spin_unlock(&ctx->ring_lock);
return NULL;
}
- fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
+ fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
spin_unlock(&ctx->ring_lock);
return fence;
@@ -392,35 +492,28 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- int i;
- struct amdgpu_device *adev = ctx->adev;
- struct drm_sched_rq *rq;
- struct drm_sched_entity *entity;
- struct amdgpu_ring *ring;
+ unsigned num_entities = amdgput_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
+ unsigned i;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
- for (i = 0; i < adev->num_rings; i++) {
- ring = adev->rings[i];
- entity = &ctx->rings[i].entity;
- rq = &ring->sched.sched_rq[ctx_prio];
-
- if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
- continue;
+ for (i = 0; i < num_entities; i++) {
+ struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
- drm_sched_entity_set_rq(entity, rq);
+ drm_sched_entity_set_priority(entity, ctx_prio);
}
}
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity)
{
- struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
- unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
- struct dma_fence *other = cring->fences[idx];
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+ unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
+ struct dma_fence *other = centity->fences[idx];
if (other) {
signed long r;
@@ -444,6 +537,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
{
+ unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id, i;
@@ -459,13 +553,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
return;
}
- for (i = 0; i < ctx->adev->num_rings; i++) {
+ for (i = 0; i < num_entities; i++) {
+ struct drm_sched_entity *entity;
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
- continue;
-
- max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
- max_wait);
+ entity = &ctx->entities[0][i].entity;
+ max_wait = drm_sched_entity_flush(entity, max_wait);
}
}
mutex_unlock(&mgr->lock);
@@ -473,6 +565,7 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
+ unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id, i;
@@ -484,16 +577,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
if (!ctx->adev)
return;
- for (i = 0; i < ctx->adev->num_rings; i++) {
-
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
- continue;
-
- if (kref_read(&ctx->refcount) == 1)
- drm_sched_entity_fini(&ctx->rings[i].entity);
- else
- DRM_ERROR("ctx %p is still alive\n", ctx);
+ if (kref_read(&ctx->refcount) != 1) {
+ DRM_ERROR("ctx %p is still alive\n", ctx);
+ continue;
}
+
+ for (i = 0; i < num_entities; i++)
+ drm_sched_entity_fini(&ctx->entities[0][i].entity);
}
}
OpenPOWER on IntegriCloud