diff options
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_gem.c')
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 81 |
1 files changed, 70 insertions, 11 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 638540943c61..2107b0daf8ef 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -146,7 +146,7 @@ vc4_save_hang_state(struct drm_device *dev) struct vc4_exec_info *exec[2]; struct vc4_bo *bo; unsigned long irqflags; - unsigned int i, j, unref_list_count, prev_idx; + unsigned int i, j, k, unref_list_count; kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); if (!kernel_state) @@ -182,7 +182,7 @@ vc4_save_hang_state(struct drm_device *dev) return; } - prev_idx = 0; + k = 0; for (i = 0; i < 2; i++) { if (!exec[i]) continue; @@ -197,7 +197,7 @@ vc4_save_hang_state(struct drm_device *dev) WARN_ON(!refcount_read(&bo->usecnt)); refcount_inc(&bo->usecnt); drm_gem_object_get(&exec[i]->bo[j]->base); - kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; + kernel_state->bo[k++] = &exec[i]->bo[j]->base; } list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { @@ -205,12 +205,12 @@ vc4_save_hang_state(struct drm_device *dev) * because they are naturally unpurgeable. */ drm_gem_object_get(&bo->base.base); - kernel_state->bo[j + prev_idx] = &bo->base.base; - j++; + kernel_state->bo[k++] = &bo->base.base; } - prev_idx = j + 1; } + WARN_ON_ONCE(k != state->bo_count); + if (exec[0]) state->start_bin = exec[0]->ct0ca; if (exec[1]) @@ -436,6 +436,19 @@ vc4_flush_caches(struct drm_device *dev) VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); } +static void +vc4_flush_texture_caches(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + + V3D_WRITE(V3D_L2CACTL, + V3D_L2CACTL_L2CCLR); + + V3D_WRITE(V3D_SLCACTL, + VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | + VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC)); +} + /* Sets the registers for the next job to be actually be executed in * the hardware. * @@ -454,14 +467,30 @@ again: vc4_flush_caches(dev); + /* Only start the perfmon if it was not already started by a previous + * job. + */ + if (exec->perfmon && vc4->active_perfmon != exec->perfmon) + vc4_perfmon_start(vc4, exec->perfmon); + /* Either put the job in the binner if it uses the binner, or * immediately move it to the to-be-rendered queue. */ if (exec->ct0ca != exec->ct0ea) { submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); } else { + struct vc4_exec_info *next; + vc4_move_job_to_render(dev, exec); - goto again; + next = vc4_first_bin_job(vc4); + + /* We can't start the next bin job if the previous job had a + * different perfmon instance attached to it. The same goes + * if one of them had a perfmon attached to it and the other + * one doesn't. + */ + if (next && next->perfmon == exec->perfmon) + goto again; } } @@ -474,6 +503,14 @@ vc4_submit_next_render_job(struct drm_device *dev) if (!exec) return; + /* A previous RCL may have written to one of our textures, and + * our full cache flush at bin time may have occurred before + * that RCL completed. Flush the texture cache now, but not + * the instructions or uniforms (since we don't write those + * from an RCL). + */ + vc4_flush_texture_caches(dev); + submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); } @@ -621,6 +658,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, struct ww_acquire_ctx *acquire_ctx) { struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_exec_info *renderjob; uint64_t seqno; unsigned long irqflags; struct vc4_fence *fence; @@ -646,11 +684,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, list_add_tail(&exec->head, &vc4->bin_job_list); - /* If no job was executing, kick ours off. Otherwise, it'll - * get started when the previous job's flush done interrupt - * occurs. + /* If no bin job was executing and if the render job (if any) has the + * same perfmon as our job attached to it (or if both jobs don't have + * perfmon activated), then kick ours off. Otherwise, it'll get + * started when the previous job's flush/render done interrupt occurs. */ - if (vc4_first_bin_job(vc4) == exec) { + renderjob = vc4_first_render_job(vc4); + if (vc4_first_bin_job(vc4) == exec && + (!renderjob || renderjob->perfmon == exec->perfmon)) { vc4_submit_next_bin_job(dev); vc4_queue_hangcheck(dev); } @@ -915,6 +956,9 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) vc4->bin_alloc_used &= ~exec->bin_slots; spin_unlock_irqrestore(&vc4->job_lock, irqflags); + /* Release the reference we had on the perf monitor. */ + vc4_perfmon_put(exec->perfmon); + mutex_lock(&vc4->power_lock); if (--vc4->power_refcount == 0) { pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); @@ -1067,6 +1111,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; @@ -1080,6 +1125,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + if (args->pad2 != 0) { + DRM_DEBUG("->pad2 must be set to zero\n"); + return -EINVAL; + } + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); if (!exec) { DRM_ERROR("malloc failure on exec struct\n"); @@ -1105,6 +1155,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->perfmonid) { + exec->perfmon = vc4_perfmon_find(vc4file, + args->perfmonid); + if (!exec->perfmon) { + ret = -ENOENT; + goto fail; + } + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) |