diff options
Diffstat (limited to 'drivers/gpu/drm')
120 files changed, 16544 insertions, 7083 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 183f5dc1c3f2..1ae0bb91ee60 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -19,6 +19,7 @@ config DRM_I915 select INPUT if ACPI select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI + select SYNC_FILE help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 597648c7a645..e091809a9a9e 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -24,7 +24,9 @@ config DRM_I915_DEBUG select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y + select DRM_DEBUG_MM_SELFTEST select DRM_I915_SW_FENCE_DEBUG_OBJECTS + select DRM_I915_SELFTEST default n help Choose this option to turn on extra driver debugging that may affect @@ -58,3 +60,30 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS Recommended for driver developers only. If in doubt, say "N". + +config DRM_I915_SELFTEST + bool "Enable selftests upon driver load" + depends on DRM_I915 + default n + select FAULT_INJECTION + select PRIME_NUMBERS + help + Choose this option to allow the driver to perform selftests upon + loading; also requires the i915.selftest=1 module parameter. To + exit the module after running the selftests (i.e. to prevent normal + module initialisation afterwards) use i915.selftest=-1. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_I915_LOW_LEVEL_TRACEPOINTS + bool "Enable low level request tracing events" + depends on DRM_I915 + default n + help + Choose this option to turn on low level request tracing events. + This provides the ability to precisely monitor engine utilisation + and also analyze the request dependency resolving timeline. + + If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 74ca2e8b2494..53e30fcb2751 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o # GEM code i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ + i915_gem_clflush.o \ i915_gem_context.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ @@ -72,6 +73,7 @@ i915-y += intel_audio.o \ intel_atomic.o \ intel_atomic_plane.o \ intel_bios.o \ + intel_cdclk.o \ intel_color.o \ intel_display.o \ intel_dpio_phy.o \ @@ -116,6 +118,9 @@ i915-y += dvo_ch7017.o \ # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o +i915-$(CONFIG_DRM_I915_SELFTEST) += \ + selftests/i915_random.o \ + selftests/i915_selftest.o # virtual gpu code i915-y += i915_vgpu.o diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index b9c8e2407682..81076d8ec41a 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1530,7 +1530,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, len += copy_len; gma += copy_len; } - return 0; + return len; } @@ -1644,7 +1644,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, gma, gma + bb_size, dst); - if (ret) { + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); goto unmap_src; } @@ -2608,11 +2608,8 @@ out: static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx; - struct intel_ring *ring = shadow_ctx->engine[ring_id].ring; unsigned long gma_head, gma_tail, gma_top, guest_rb_size; - unsigned int copy_len = 0; + u32 *cs; int ret; guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); @@ -2626,36 +2623,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_top = workload->rb_start + guest_rb_size; /* allocate shadow ring buffer */ - ret = intel_ring_begin(workload->req, workload->rb_len / 4); - if (ret) - return ret; + cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* get shadow ring buffer va */ - workload->shadow_ring_buffer_va = ring->vaddr + ring->tail; + workload->shadow_ring_buffer_va = cs; /* head > tail --> copy head <-> top */ if (gma_head > gma_tail) { ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_top, - workload->shadow_ring_buffer_va); - if (ret) { + gma_head, gma_top, cs); + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); return ret; } - copy_len = gma_top - gma_head; + cs += ret / sizeof(u32); gma_head = workload->rb_start; } /* copy head or start <-> tail */ - ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_tail, - workload->shadow_ring_buffer_va + copy_len); - if (ret) { + ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); return ret; } - ring->tail += workload->rb_len; - intel_ring_advance(ring); + cs += ret / sizeof(u32); + intel_ring_advance(workload->req, cs); return 0; } @@ -2709,7 +2703,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->workload->vgpu->gtt.ggtt_mm, guest_gma, guest_gma + ctx_size, map); - if (ret) { + if (ret < 0) { gvt_err("fail to copy guest indirect ctx\n"); goto unmap_src; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fa69d72fdcb9..1a28b5279bec 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -61,6 +61,23 @@ drm_add_fake_info_node(struct drm_minor *minor, return 0; } +static __always_inline void seq_print_param(struct seq_file *m, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + seq_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + seq_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + seq_printf(m, "i915.%s=%s\n", name, *(const char **)x); + else + BUILD_BUG(); +} + static int i915_capabilities(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -69,10 +86,17 @@ static int i915_capabilities(struct seq_file *m, void *data) seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); + #define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x)) DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG + kernel_param_lock(THIS_MODULE); +#define PRINT_PARAM(T, x) seq_print_param(m, #x, #T, &i915.x); + I915_PARAMS_FOR_EACH(PRINT_PARAM); +#undef PRINT_PARAM + kernel_param_unlock(THIS_MODULE); + return 0; } @@ -454,7 +478,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", - ggtt->base.total, ggtt->mappable_end - ggtt->base.start); + ggtt->base.total, ggtt->mappable_end); seq_putc(m, '\n'); print_batch_pool_stats(m, dev_priv); @@ -853,10 +877,22 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IIR_RW)); seq_printf(m, "Display IMR:\t%08x\n", I915_READ(VLV_IMR)); - for_each_pipe(dev_priv, pipe) + for_each_pipe(dev_priv, pipe) { + enum intel_display_power_domain power_domain; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, + power_domain)) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } + seq_printf(m, "Pipe %c stat:\t%08x\n", pipe_name(pipe), I915_READ(PIPESTAT(pipe))); + intel_display_power_put(dev_priv, power_domain); + } seq_printf(m, "Master IER:\t%08x\n", I915_READ(VLV_MASTER_IER)); @@ -954,101 +990,96 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) } #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - -static ssize_t -i915_error_state_write(struct file *filp, - const char __user *ubuf, - size_t cnt, - loff_t *ppos) +static ssize_t gpu_state_read(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) { - struct i915_error_state_file_priv *error_priv = filp->private_data; - - DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(error_priv->i915); + struct i915_gpu_state *error = file->private_data; + struct drm_i915_error_state_buf str; + ssize_t ret; + loff_t tmp; - return cnt; -} - -static int i915_error_state_open(struct inode *inode, struct file *file) -{ - struct drm_i915_private *dev_priv = inode->i_private; - struct i915_error_state_file_priv *error_priv; + if (!error) + return 0; - error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL); - if (!error_priv) - return -ENOMEM; + ret = i915_error_state_buf_init(&str, error->i915, count, *pos); + if (ret) + return ret; - error_priv->i915 = dev_priv; + ret = i915_error_state_to_str(&str, error); + if (ret) + goto out; - i915_error_state_get(&dev_priv->drm, error_priv); + tmp = 0; + ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes); + if (ret < 0) + goto out; - file->private_data = error_priv; + *pos = str.start + ret; +out: + i915_error_state_buf_release(&str); + return ret; +} +static int gpu_state_release(struct inode *inode, struct file *file) +{ + i915_gpu_state_put(file->private_data); return 0; } -static int i915_error_state_release(struct inode *inode, struct file *file) +static int i915_gpu_info_open(struct inode *inode, struct file *file) { - struct i915_error_state_file_priv *error_priv = file->private_data; + struct i915_gpu_state *gpu; - i915_error_state_put(error_priv); - kfree(error_priv); + gpu = i915_capture_gpu_state(inode->i_private); + if (!gpu) + return -ENOMEM; + file->private_data = gpu; return 0; } -static ssize_t i915_error_state_read(struct file *file, char __user *userbuf, - size_t count, loff_t *pos) +static const struct file_operations i915_gpu_info_fops = { + .owner = THIS_MODULE, + .open = i915_gpu_info_open, + .read = gpu_state_read, + .llseek = default_llseek, + .release = gpu_state_release, +}; + +static ssize_t +i915_error_state_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) { - struct i915_error_state_file_priv *error_priv = file->private_data; - struct drm_i915_error_state_buf error_str; - loff_t tmp_pos = 0; - ssize_t ret_count = 0; - int ret; + struct i915_gpu_state *error = filp->private_data; - ret = i915_error_state_buf_init(&error_str, error_priv->i915, - count, *pos); - if (ret) - return ret; + if (!error) + return 0; - ret = i915_error_state_to_str(&error_str, error_priv); - if (ret) - goto out; + DRM_DEBUG_DRIVER("Resetting error state\n"); + i915_reset_error_state(error->i915); - ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos, - error_str.buf, - error_str.bytes); + return cnt; +} - if (ret_count < 0) - ret = ret_count; - else - *pos = error_str.start + ret_count; -out: - i915_error_state_buf_release(&error_str); - return ret ?: ret_count; +static int i915_error_state_open(struct inode *inode, struct file *file) +{ + file->private_data = i915_first_error_state(inode->i_private); + return 0; } static const struct file_operations i915_error_state_fops = { .owner = THIS_MODULE, .open = i915_error_state_open, - .read = i915_error_state_read, + .read = gpu_state_read, .write = i915_error_state_write, .llseek = default_llseek, - .release = i915_error_state_release, + .release = gpu_state_release, }; - #endif static int -i915_next_seqno_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno); - return 0; -} - -static int i915_next_seqno_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; @@ -1066,13 +1097,12 @@ i915_next_seqno_set(void *data, u64 val) } DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, - i915_next_seqno_get, i915_next_seqno_set, + NULL, i915_next_seqno_set, "0x%llx\n"); static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; int ret = 0; intel_runtime_pm_get(dev_priv); @@ -1135,10 +1165,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } /* RPSTAT1 is in the GT power well */ - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out; - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); reqf = I915_READ(GEN6_RPNSWREQ); @@ -1173,7 +1199,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) cagf = intel_gpu_freq(dev_priv, cagf); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - mutex_unlock(&dev->struct_mutex); if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { pm_ier = I915_READ(GEN6_PMIER); @@ -1224,21 +1249,18 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -1262,11 +1284,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_puts(m, "no P-state info available\n"); } - seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk_freq); + seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk.hw.cdclk); seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); -out: intel_runtime_pm_put(dev_priv); return ret; } @@ -1814,7 +1835,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) if (ret) goto out; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; @@ -1834,8 +1855,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) &ia_freq); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", intel_gpu_freq(dev_priv, (gpu_freq * - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1))), + (IS_GEN9_BC(dev_priv) ? + GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } @@ -2328,10 +2349,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", rps_power_to_str(dev_priv->rps.power)); seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", - 100 * rpup / rpupei, + rpup && rpupei ? 100 * rpup / rpupei : 0, dev_priv->rps.up_threshold); seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", - 100 * rpdown / rpdownei, + rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, dev_priv->rps.down_threshold); } else { seq_puts(m, "\nRPS Autotuning inactive\n"); @@ -2377,7 +2398,9 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", huc_fw->rsa_offset, huc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); + intel_runtime_pm_put(dev_priv); return 0; } @@ -2409,6 +2432,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", guc_fw->rsa_offset, guc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); + tmp = I915_READ(GUC_STATUS); seq_printf(m, "\nGuC status 0x%08x:\n", tmp); @@ -2422,6 +2447,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) for (i = 0; i < 16; i++) seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); + intel_runtime_pm_put(dev_priv); + return 0; } @@ -2803,15 +2830,10 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) seq_printf(m, "%-25s %d\n", power_well->name, power_well->count); - for (power_domain = 0; power_domain < POWER_DOMAIN_NUM; - power_domain++) { - if (!(BIT(power_domain) & power_well->domains)) - continue; - + for_each_power_domain(power_domain, power_well->domains) seq_printf(m, " %-23s %d\n", intel_display_power_domain_str(power_domain), power_domains->domain_use_count[power_domain]); - } } mutex_unlock(&power_domains->lock); @@ -3320,15 +3342,21 @@ static int i915_engine_info(struct seq_file *m, void *unused) rcu_read_lock(); rq = READ_ONCE(engine->execlist_port[0].request); - if (rq) - print_request(m, rq, "\t\tELSP[0] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[0] count=%d, ", + engine->execlist_port[0].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[0] idle\n"); + } rq = READ_ONCE(engine->execlist_port[1].request); - if (rq) - print_request(m, rq, "\t\tELSP[1] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[1] count=%d, ", + engine->execlist_port[1].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[1] idle\n"); + } rcu_read_unlock(); spin_lock_irq(&engine->timeline->lock); @@ -3772,7 +3800,19 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) if (connector->status == connector_status_connected && connector->encoder != NULL) { intel_dp = enc_to_intel_dp(connector->encoder); - seq_printf(m, "%lx", intel_dp->compliance.test_data.edid); + if (intel_dp->compliance.test_type == + DP_TEST_LINK_EDID_READ) + seq_printf(m, "%lx", + intel_dp->compliance.test_data.edid); + else if (intel_dp->compliance.test_type == + DP_TEST_LINK_VIDEO_PATTERN) { + seq_printf(m, "hdisplay: %d\n", + intel_dp->compliance.test_data.hdisplay); + seq_printf(m, "vdisplay: %d\n", + intel_dp->compliance.test_data.vdisplay); + seq_printf(m, "bpc: %u\n", + intel_dp->compliance.test_data.bpc); + } } else seq_puts(m, "0"); } @@ -4263,7 +4303,8 @@ i915_max_freq_set(void *data, u64 val) dev_priv->rps.max_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); @@ -4318,7 +4359,8 @@ i915_min_freq_set(void *data, u64 val) dev_priv->rps.min_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); @@ -4444,7 +4486,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; @@ -4607,6 +4649,81 @@ static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor) return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops); } +static int i915_hpd_storm_ctl_show(struct seq_file *m, void *data) +{ + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; + + seq_printf(m, "Threshold: %d\n", hotplug->hpd_storm_threshold); + seq_printf(m, "Detected: %s\n", + yesno(delayed_work_pending(&hotplug->reenable_work))); + + return 0; +} + +static ssize_t i915_hpd_storm_ctl_write(struct file *file, + const char __user *ubuf, size_t len, + loff_t *offp) +{ + struct seq_file *m = file->private_data; + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; + unsigned int new_threshold; + int i; + char *newline; + char tmp[16]; + + if (len >= sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(tmp, ubuf, len)) + return -EFAULT; + + tmp[len] = '\0'; + + /* Strip newline, if any */ + newline = strchr(tmp, '\n'); + if (newline) + *newline = '\0'; + + if (strcmp(tmp, "reset") == 0) + new_threshold = HPD_STORM_DEFAULT_THRESHOLD; + else if (kstrtouint(tmp, 10, &new_threshold) != 0) + return -EINVAL; + + if (new_threshold > 0) + DRM_DEBUG_KMS("Setting HPD storm detection threshold to %d\n", + new_threshold); + else + DRM_DEBUG_KMS("Disabling HPD storm detection\n"); + + spin_lock_irq(&dev_priv->irq_lock); + hotplug->hpd_storm_threshold = new_threshold; + /* Reset the HPD storm stats so we don't accidentally trigger a storm */ + for_each_hpd_pin(i) + hotplug->stats[i].count = 0; + spin_unlock_irq(&dev_priv->irq_lock); + + /* Re-enable hpd immediately if we were in an irq storm */ + flush_delayed_work(&dev_priv->hotplug.reenable_work); + + return len; +} + +static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file) +{ + return single_open(file, i915_hpd_storm_ctl_show, inode->i_private); +} + +static const struct file_operations i915_hpd_storm_ctl_fops = { + .owner = THIS_MODULE, + .open = i915_hpd_storm_ctl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = i915_hpd_storm_ctl_write +}; + static int i915_debugfs_create(struct dentry *root, struct drm_minor *minor, const char *name, @@ -4690,6 +4807,7 @@ static const struct i915_debugfs_files { {"i915_gem_drop_caches", &i915_drop_caches_fops}, #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) {"i915_error_state", &i915_error_state_fops}, + {"i915_gpu_info", &i915_gpu_info_fops}, #endif {"i915_next_seqno", &i915_next_seqno_fops}, {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, @@ -4700,7 +4818,8 @@ static const struct i915_debugfs_files { {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, - {"i915_guc_log_control", &i915_guc_log_control_fops} + {"i915_guc_log_control", &i915_guc_log_control_fops}, + {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f6017f2cfb86..9be9b9b7f9cb 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -43,6 +43,7 @@ #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_atomic_helper.h> #include <drm/i915_drm.h> #include "i915_drv.h" @@ -248,6 +249,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_IRQ_ACTIVE: case I915_PARAM_ALLOW_BATCHBUFFER: case I915_PARAM_LAST_DISPATCH: + case I915_PARAM_HAS_EXEC_CONSTANTS: /* Reject all old ums/dri params. */ return -ENODEV; case I915_PARAM_CHIPSET_ID: @@ -274,9 +276,6 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_BSD2: value = !!dev_priv->engine[VCS2]; break; - case I915_PARAM_HAS_EXEC_CONSTANTS: - value = INTEL_GEN(dev_priv) >= 4; - break; case I915_PARAM_HAS_LLC: value = HAS_LLC(dev_priv); break; @@ -318,10 +317,9 @@ static int i915_getparam(struct drm_device *dev, void *data, value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool; break; case I915_PARAM_HUC_STATUS: - /* The register is already force-woken. We dont need - * any rpm here - */ + intel_runtime_pm_get(dev_priv); value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; + intel_runtime_pm_put(dev_priv); break; case I915_PARAM_MMAP_GTT_VERSION: /* Though we've started our numbering from 1, and so class all @@ -350,6 +348,8 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_EXEC_SOFTPIN: + case I915_PARAM_HAS_EXEC_ASYNC: + case I915_PARAM_HAS_EXEC_FENCE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -756,6 +756,15 @@ out_err: return -ENOMEM; } +static void i915_engines_cleanup(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + kfree(engine); +} + static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) { destroy_workqueue(dev_priv->hotplug.dp_wq); @@ -769,10 +778,17 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) */ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) { - if (IS_HSW_EARLY_SDV(dev_priv) || - IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0)) + bool pre = false; + + pre |= IS_HSW_EARLY_SDV(dev_priv); + pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); + pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); + + if (pre) { DRM_ERROR("This is a pre-production stepping. " "It may not be fully functional.\n"); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); + } } /** @@ -808,6 +824,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, spin_lock_init(&dev_priv->gpu_error.lock); mutex_init(&dev_priv->backlight_lock); spin_lock_init(&dev_priv->uncore.lock); + spin_lock_init(&dev_priv->mm.object_stat_lock); spin_lock_init(&dev_priv->mmio_flip_lock); spin_lock_init(&dev_priv->wm.dsparb_lock); @@ -818,12 +835,15 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->pps_mutex); intel_uc_init_early(dev_priv); - i915_memcpy_init_early(dev_priv); + ret = intel_engines_init_early(dev_priv); + if (ret) + return ret; + ret = i915_workqueues_init(dev_priv); if (ret < 0) - return ret; + goto err_engines; /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(dev_priv); @@ -852,6 +872,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, err_workqueues: i915_workqueues_cleanup(dev_priv); +err_engines: + i915_engines_cleanup(dev_priv); return ret; } @@ -864,6 +886,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) i915_perf_fini(dev_priv); i915_gem_load_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv); + i915_engines_cleanup(dev_priv); } static int i915_mmio_setup(struct drm_i915_private *dev_priv) @@ -930,6 +953,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) goto put_bridge; intel_uncore_init(dev_priv); + i915_gem_init_mmio(dev_priv); return 0; @@ -967,7 +991,7 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); - DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); + DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores)); } /** @@ -1186,11 +1210,15 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) */ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) { + const struct intel_device_info *match_info = + (struct intel_device_info *)ent->driver_data; struct drm_i915_private *dev_priv; int ret; - if (i915.nuclear_pageflip) - driver.driver_features |= DRIVER_ATOMIC; + /* Enable nuclear pageflip on ILK+, except vlv/chv */ + if (!i915.nuclear_pageflip && + (match_info->gen < 5 || match_info->has_gmch_display)) + driver.driver_features &= ~DRIVER_ATOMIC; ret = -ENOMEM; dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); @@ -1198,8 +1226,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = drm_dev_init(&dev_priv->drm, &driver, &pdev->dev); if (ret) { DRM_DEV_ERROR(&pdev->dev, "allocation failed\n"); - kfree(dev_priv); - return ret; + goto out_free; } dev_priv->drm.pdev = pdev; @@ -1207,7 +1234,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = pci_enable_device(pdev); if (ret) - goto out_free_priv; + goto out_fini; pci_set_drvdata(pdev, &dev_priv->drm); @@ -1271,9 +1298,11 @@ out_runtime_pm_put: i915_driver_cleanup_early(dev_priv); out_pci_disable: pci_disable_device(pdev); -out_free_priv: +out_fini: i915_load_error(dev_priv, "Device initialization failed (%d)\n", ret); - drm_dev_unref(&dev_priv->drm); + drm_dev_fini(&dev_priv->drm); +out_free: + kfree(dev_priv); return ret; } @@ -1281,6 +1310,8 @@ void i915_driver_unload(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_modeset_acquire_ctx ctx; + int ret; intel_fbdev_fini(dev); @@ -1289,6 +1320,24 @@ void i915_driver_unload(struct drm_device *dev) intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + drm_modeset_acquire_init(&ctx, 0); + while (1) { + ret = drm_modeset_lock_all_ctx(dev, &ctx); + if (!ret) + ret = drm_atomic_helper_disable_all(dev, &ctx); + + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(&ctx); + } + + if (ret) + DRM_ERROR("Disabling all crtc's during unload failed with %i\n", ret); + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + intel_gvt_cleanup(dev_priv); i915_driver_unregister(dev_priv); @@ -1318,7 +1367,7 @@ void i915_driver_unload(struct drm_device *dev) /* Free error state after interrupts are fully disabled. */ cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); /* Flush any outstanding unpin_work. */ drain_workqueue(dev_priv->wq); @@ -1334,8 +1383,16 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_cleanup_mmio(dev_priv); intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); +} + +static void i915_driver_release(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); i915_driver_cleanup_early(dev_priv); + drm_dev_fini(&dev_priv->drm); + + kfree(dev_priv); } static int i915_driver_open(struct drm_device *dev, struct drm_file *file) @@ -1717,6 +1774,8 @@ static int i915_drm_resume_early(struct drm_device *dev) !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload)) intel_power_domains_init_hw(dev_priv, true); + i915_gem_sanitize(dev_priv); + enable_rpm_wakeref_asserts(dev_priv); out: @@ -1788,7 +1847,7 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } - i915_gem_reset_finish(dev_priv); + i915_gem_reset(dev_priv); intel_overlay_reset(dev_priv); /* Ok, now get things going again... */ @@ -1814,6 +1873,7 @@ void i915_reset(struct drm_i915_private *dev_priv) i915_queue_hangcheck(dev_priv); wakeup: + i915_gem_reset_finish(dev_priv); enable_irq(dev_priv->drm.irq); wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS); return; @@ -2533,7 +2593,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), @@ -2575,7 +2635,8 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER | DRIVER_MODESET, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, + .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, .preclose = i915_driver_preclose, @@ -2604,3 +2665,7 @@ static struct drm_driver driver = { .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, }; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_drm.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e44c598ecb82..933874ddea75 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170123" -#define DRIVER_TIMESTAMP 1485156432 +#define DRIVER_DATE "20170206" +#define DRIVER_TIMESTAMP 1486372993 #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ @@ -293,6 +293,7 @@ enum plane_id { PLANE_PRIMARY, PLANE_SPRITE0, PLANE_SPRITE1, + PLANE_SPRITE2, PLANE_CURSOR, I915_MAX_PLANES, }; @@ -343,6 +344,11 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_DDI_C_LANES, POWER_DOMAIN_PORT_DDI_D_LANES, POWER_DOMAIN_PORT_DDI_E_LANES, + POWER_DOMAIN_PORT_DDI_A_IO, + POWER_DOMAIN_PORT_DDI_B_IO, + POWER_DOMAIN_PORT_DDI_C_IO, + POWER_DOMAIN_PORT_DDI_D_IO, + POWER_DOMAIN_PORT_DDI_E_IO, POWER_DOMAIN_PORT_DSI, POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, @@ -384,6 +390,8 @@ enum hpd_pin { #define for_each_hpd_pin(__pin) \ for ((__pin) = (HPD_NONE + 1); (__pin) < HPD_NUM_PINS; (__pin)++) +#define HPD_STORM_DEFAULT_THRESHOLD 5 + struct i915_hotplug { struct work_struct hotplug_work; @@ -407,6 +415,8 @@ struct i915_hotplug { struct work_struct poll_init_work; bool poll_enabled; + unsigned int hpd_storm_threshold; + /* * if we get a HPD irq from DP and a HPD irq from non-DP * the non-DP HPD could block the workqueue on a mode config @@ -494,7 +504,27 @@ struct i915_hotplug { #define for_each_power_domain(domain, mask) \ for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ - for_each_if ((1 << (domain)) & (mask)) + for_each_if (BIT_ULL(domain) & (mask)) + +#define for_each_power_well(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells; \ + (__power_well) - (__dev_priv)->power_domains.power_wells < \ + (__dev_priv)->power_domains.power_well_count; \ + (__power_well)++) + +#define for_each_power_well_rev(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells + \ + (__dev_priv)->power_domains.power_well_count - 1; \ + (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \ + (__power_well)--) + +#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) + +#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well_rev(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) struct drm_i915_private; struct i915_mm_struct; @@ -600,9 +630,13 @@ struct intel_initial_plane_config; struct intel_crtc; struct intel_limit; struct dpll; +struct intel_cdclk_state; struct drm_i915_display_funcs { - int (*get_display_clock_speed)(struct drm_i915_private *dev_priv); + void (*get_cdclk)(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state); + void (*set_cdclk)(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, @@ -617,7 +651,6 @@ struct drm_i915_display_funcs { int (*compute_global_watermarks)(struct drm_atomic_state *state); void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct drm_atomic_state *state); - void (*modeset_commit_cdclk)(struct drm_atomic_state *state); /* Returns the active state of the crtc, and if the crtc is active, * fills out the pipe-config with the hw state. */ bool (*get_pipe_config)(struct intel_crtc *, @@ -890,7 +923,7 @@ struct intel_device_info { struct intel_display_error_state; -struct drm_i915_error_state { +struct i915_gpu_state { struct kref ref; struct timeval time; struct timeval boottime; @@ -904,12 +937,13 @@ struct drm_i915_error_state { u32 reset_count; u32 suspend_count; struct intel_device_info device_info; + struct i915_params params; /* Generic register state */ u32 eir; u32 pgtbl_er; u32 ier; - u32 gtier[4]; + u32 gtier[4], ngtier; u32 ccid; u32 derrmr; u32 forcewake; @@ -923,6 +957,7 @@ struct drm_i915_error_state { u32 gab_ctl; u32 gfx_mode; + u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; @@ -970,6 +1005,16 @@ struct drm_i915_error_state { u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; struct intel_instdone instdone; + struct drm_i915_error_context { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 handle; + u32 hw_id; + int ban_score; + int active; + int guilty; + } context; + struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; @@ -1003,10 +1048,6 @@ struct drm_i915_error_state { u32 pp_dir_base; }; } vm_info; - - pid_t pid; - char comm[TASK_COMM_LEN]; - int context_bans; } engine[I915_NUM_ENGINES]; struct drm_i915_error_buffer { @@ -1395,7 +1436,7 @@ struct i915_power_well { int count; /* cached hw enabled state */ bool hw_enabled; - unsigned long domains; + u64 domains; /* unique identifier for this power well */ unsigned long id; /* @@ -1456,7 +1497,7 @@ struct i915_gem_mm { struct work_struct free_work; /** Usable portion of the GTT for GEM */ - phys_addr_t stolen_base; /* limited to low memory (32-bit) */ + dma_addr_t stolen_base; /* limited to low memory (32-bit) */ /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; @@ -1498,11 +1539,6 @@ struct drm_i915_error_state_buf { loff_t pos; }; -struct i915_error_state_file_priv { - struct drm_i915_private *i915; - struct drm_i915_error_state *error; -}; - #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ #define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ @@ -1519,7 +1555,7 @@ struct i915_gpu_error { /* For reset and error_state handling. */ spinlock_t lock; /* Protected by the above dev->gpu_error.lock. */ - struct drm_i915_error_state *first_error; + struct i915_gpu_state *first_error; unsigned long missed_irq_rings; @@ -2053,6 +2089,10 @@ struct i915_oa_ops { bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv); }; +struct intel_cdclk_state { + unsigned int cdclk, vco, ref; +}; + struct drm_i915_private { struct drm_device drm; @@ -2063,8 +2103,6 @@ struct drm_i915_private { const struct intel_device_info info; - int relative_constants_mode; - void __iomem *regs; struct intel_uncore uncore; @@ -2157,13 +2195,7 @@ struct drm_i915_private { unsigned int fsb_freq, mem_freq, is_ddr3; unsigned int skl_preferred_vco_freq; - unsigned int cdclk_freq, max_cdclk_freq; - - /* - * For reading holding any crtc lock is sufficient, - * for writing must hold all of them. - */ - unsigned int atomic_cdclk_freq; + unsigned int max_cdclk_freq; unsigned int max_dotclk_freq; unsigned int rawclk_freq; @@ -2171,8 +2203,22 @@ struct drm_i915_private { unsigned int czclk_freq; struct { - unsigned int vco, ref; - } cdclk_pll; + /* + * The current logical cdclk state. + * See intel_atomic_state.cdclk.logical + * + * For reading holding any crtc lock is sufficient, + * for writing must hold all of them. + */ + struct intel_cdclk_state logical; + /* + * The current actual cdclk state. + * See intel_atomic_state.cdclk.actual + */ + struct intel_cdclk_state actual; + /* The current hardware cdclk state */ + struct intel_cdclk_state hw; + } cdclk; /** * wq - Driver workqueue for GEM. @@ -2746,6 +2792,12 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_KBL_REVID(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_REVID(dev_priv, since, until)) +#define GLK_REVID_A0 0x0 +#define GLK_REVID_A1 0x1 + +#define IS_GLK_REVID(dev_priv, since, until) \ + (IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks @@ -2761,8 +2813,9 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEN8(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(7))) #define IS_GEN9(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(8))) -#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && INTEL_INFO(dev_priv)->is_lp) #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) +#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) +#define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv)) #define ENGINE_MASK(id) BIT(id) #define RENDER_RING ENGINE_MASK(RCS) @@ -2804,9 +2857,7 @@ intel_info(const struct drm_i915_private *dev_priv) /* WaRsDisableCoarsePowerGating:skl,bxt */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) || \ - IS_SKL_GT3(dev_priv) || \ - IS_SKL_GT4(dev_priv)) + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts @@ -2946,6 +2997,9 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); +int intel_engines_init_early(struct drm_i915_private *dev_priv); +int intel_engines_init(struct drm_i915_private *dev_priv); + /* intel_hotplug.c */ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 pin_mask, u32 long_mask); @@ -3123,6 +3177,7 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void i915_gem_sanitize(struct drm_i915_private *i915); int i915_gem_load_init(struct drm_i915_private *dev_priv); void i915_gem_load_cleanup(struct drm_i915_private *dev_priv); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); @@ -3335,15 +3390,17 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) } int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); +void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); + +void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); -int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags); +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + unsigned int flags); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); @@ -3539,7 +3596,7 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, - const struct i915_error_state_file_priv *error); + const struct i915_gpu_state *gpu); int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, struct drm_i915_private *i915, size_t count, loff_t pos); @@ -3548,13 +3605,28 @@ static inline void i915_error_state_buf_release( { kfree(eb->buf); } + +struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, u32 engine_mask, const char *error_msg); -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv); -void i915_error_state_put(struct i915_error_state_file_priv *error_priv); -void i915_destroy_error_state(struct drm_i915_private *dev_priv); + +static inline struct i915_gpu_state * +i915_gpu_state_get(struct i915_gpu_state *gpu) +{ + kref_get(&gpu->ref); + return gpu; +} + +void __i915_gpu_state_free(struct kref *kref); +static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +{ + if (gpu) + kref_put(&gpu->ref, __i915_gpu_state_free); +} + +struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +void i915_reset_error_state(struct drm_i915_private *i915); #else @@ -3564,7 +3636,13 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, { } -static inline void i915_destroy_error_state(struct drm_i915_private *dev_priv) +static inline struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) +{ + return NULL; +} + +static inline void i915_reset_error_state(struct drm_i915_private *i915) { } @@ -3696,7 +3774,7 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv); extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv); -extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val); +extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); @@ -3712,7 +3790,6 @@ extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, extern struct intel_display_error_state * intel_display_capture_error_state(struct drm_i915_private *dev_priv); extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error); int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); @@ -3722,7 +3799,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, /* intel_sideband.c */ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr); u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg); void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val); @@ -3941,14 +4018,34 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } static inline bool -__i915_request_irq_complete(struct drm_i915_gem_request *req) +__i915_request_irq_complete(const struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + u32 seqno; + + /* Note that the engine may have wrapped around the seqno, and + * so our request->global_seqno will be ahead of the hardware, + * even though it completed the request before wrapping. We catch + * this by kicking all the waiters before resetting the seqno + * in hardware, and also signal the fence. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) + return true; + + /* The request was dequeued before we were awoken. We check after + * inspecting the hw to confirm that this was the same request + * that generated the HWS update. The memory barriers within + * the request execution are sufficient to ensure that a check + * after reading the value from hw matches this request. + */ + seqno = i915_gem_request_global_seqno(req); + if (!seqno) + return false; /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; /* Ensure our read of the seqno is coherent so that we @@ -3964,7 +4061,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) */ if (engine->irq_seqno_barrier && rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current && - cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) { + test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) { struct task_struct *tsk; /* The ordering of irq_posted versus applying the barrier @@ -3999,7 +4096,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) wake_up_process(tsk); rcu_read_unlock(); - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; } @@ -4030,4 +4127,10 @@ int remap_io_mapping(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, struct io_mapping *iomap); +static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj) +{ + return (obj->cache_level != I915_CACHE_NONE || + HAS_LLC(to_i915(obj->base.dev))); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 88f3628b4e29..01dbba3813c7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,12 +29,14 @@ #include <drm/drm_vma_manager.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" #include <linux/dma-fence-array.h> +#include <linux/kthread.h> #include <linux/reservation.h> #include <linux/shmem_fs.h> #include <linux/slab.h> @@ -47,18 +49,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static bool cpu_cache_is_coherent(struct drm_device *dev, - enum i915_cache_level level) -{ - return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE; -} - static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return false; - if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + if (!i915_gem_object_is_coherent(obj)) return true; return obj->pin_display; @@ -254,7 +250,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (needs_clflush && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + !i915_gem_object_is_coherent(obj)) drm_clflush_sg(pages); obj->base.read_domains = I915_GEM_DOMAIN_CPU; @@ -312,6 +308,8 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +static const struct drm_i915_gem_object_ops i915_gem_object_ops; + int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -399,7 +397,7 @@ out: if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) i915_gem_request_retire_upto(rq); - if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) { + if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -424,7 +422,9 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, long timeout, struct intel_rps_client *rps) { + unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; + bool prune_fences = false; if (flags & I915_WAIT_ALL) { struct dma_fence **shared; @@ -449,15 +449,26 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (; i < count; i++) dma_fence_put(shared[i]); kfree(shared); + + prune_fences = count && timeout >= 0; } else { excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout >= 0) + if (excl && timeout >= 0) { timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + prune_fences = timeout >= 0; + } dma_fence_put(excl); + if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { + reservation_object_lock(resv, NULL); + if (!__read_seqcount_retry(&resv->seq, seq)) + reservation_object_add_excl_fence(resv, NULL); + reservation_object_unlock(resv); + } + return timeout; } @@ -585,9 +596,18 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (obj->mm.pages) return -EBUSY; + GEM_BUG_ON(obj->ops != &i915_gem_object_ops); obj->ops = &i915_gem_phys_ops; - return i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_xfer; + + return 0; + +err_xfer: + obj->ops = &i915_gem_object_ops; + return ret; } static int @@ -608,7 +628,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, drm_clflush_virt_range(vaddr, args->size); i915_gem_chipset_flush(to_i915(obj->base.dev)); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); return 0; } @@ -771,8 +791,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * anyway again before the next pread happens. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); + *needs_clflush = !i915_gem_object_is_coherent(obj); if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, false); @@ -828,8 +847,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, * before writing. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); + *needs_clflush |= !i915_gem_object_is_coherent(obj); if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, true); @@ -1257,7 +1275,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, user_data += page_length; offset += page_length; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); mutex_lock(&i915->drm.struct_mutex); out_unpin: @@ -1393,7 +1411,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = 0; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); i915_gem_obj_finish_shmem_access(obj); return ret; } @@ -1596,23 +1614,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int err = 0; obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; /* Pinned buffers may be scanout, so flush the cache */ - if (READ_ONCE(obj->pin_display)) { - err = i915_mutex_lock_interruptible(dev); - if (!err) { - i915_gem_object_flush_cpu_write_domain(obj); - mutex_unlock(&dev->struct_mutex); - } - } - + i915_gem_object_flush_if_display(obj); i915_gem_object_put(obj); - return err; + + return 0; } /** @@ -2223,17 +2234,17 @@ unlock: mutex_unlock(&obj->mm.lock); } -static void i915_sg_trim(struct sg_table *orig_st) +static bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; struct scatterlist *sg, *new_sg; unsigned int i; if (orig_st->nents == orig_st->orig_nents) - return; + return false; if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) - return; + return false; new_sg = new_st.sgl; for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { @@ -2246,6 +2257,7 @@ static void i915_sg_trim(struct sg_table *orig_st) sg_free_table(orig_st); *orig_st = new_st; + return true; } static struct sg_table * @@ -2596,7 +2608,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct drm_i915_gem_request *request, *active = NULL; + unsigned long flags; /* We are called by the error capture and reset at a random * point in time. In particular, note that neither is crucially @@ -2606,15 +2619,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ + spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) { - if (__i915_gem_request_completed(request)) + if (__i915_gem_request_completed(request, + request->global_seqno)) continue; GEM_BUG_ON(request->engine != engine); - return request; + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &request->fence.flags)); + + active = request; + break; } + spin_unlock_irqrestore(&engine->timeline->lock, flags); - return NULL; + return active; } static bool engine_stalled(struct intel_engine_cs *engine) @@ -2641,7 +2661,30 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *request; + /* Prevent the signaler thread from updating the request + * state (by calling dma_fence_signal) as we are processing + * the reset. The write from the GPU of the seqno is + * asynchronous and the signaler thread may see a different + * value to us and declare the request complete, even though + * the reset routine have picked that request as the active + * (incomplete) request. This conflict is not handled + * gracefully! + */ + kthread_park(engine->breadcrumbs.signaler); + + /* Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its engine->irq_tasklet *just* as we are + * calling engine->init_hw() and also writing the ELSP. + * Turning off the engine->irq_tasklet until the reset is over + * prevents the race. + */ tasklet_kill(&engine->irq_tasklet); + tasklet_disable(&engine->irq_tasklet); + + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); if (engine_stalled(engine)) { request = i915_gem_find_active_request(engine); @@ -2739,9 +2782,6 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - request = i915_gem_find_active_request(engine); if (request && i915_gem_reset_request(request)) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", @@ -2756,7 +2796,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->reset_hw(engine, request); } -void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +void i915_gem_reset(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -2765,8 +2805,14 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) i915_gem_retire_requests(dev_priv); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + struct i915_gem_context *ctx; + i915_gem_reset_engine(engine); + ctx = fetch_and_zero(&engine->last_retired_context); + if (ctx) + engine->context_unpin(engine, ctx); + } i915_gem_restore_fences(dev_priv); @@ -2778,6 +2824,19 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) } } +void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + for_each_engine(engine, dev_priv, id) { + tasklet_enable(&engine->irq_tasklet); + kthread_unpark(engine->breadcrumbs.signaler); + } +} + static void nop_submit_request(struct drm_i915_gem_request *request) { dma_fence_set_error(&request->fence, -EIO); @@ -3029,6 +3088,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); if (args->timeout_ns < 0) args->timeout_ns = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regression from the timespec->ktime conversion. + */ + if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) + args->timeout_ns = 0; } i915_gem_object_put(obj); @@ -3071,41 +3140,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return 0; } -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, - bool force) -{ - /* If we don't have a page list set up, then we're not pinned - * to GPU, and we can ignore the cache flush because it'll happen - * again at bind time. - */ - if (!obj->mm.pages) - return; - - /* - * Stolen memory is always coherent with the GPU as it is explicitly - * marked as wc by the system, or the system is cache-coherent. - */ - if (obj->stolen || obj->phys_handle) - return; - - /* If the GPU is snooping the contents of the CPU cache, - * we do not need to manually clear the CPU cache lines. However, - * the caches are only snooped when the render cache is - * flushed/invalidated. As we always have to emit invalidations - * and flushes when moving into and out of the RENDER domain, correct - * snooping behaviour occurs naturally as the result of our domain - * tracking. - */ - if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { - obj->cache_dirty = true; - return; - } - - trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->mm.pages); - obj->cache_dirty = false; -} - /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) @@ -3134,12 +3168,9 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_GTT); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -3149,13 +3180,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj, obj->pin_display); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + obj->base.write_domain = 0; +} +static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) +{ + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) + return; + + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_CPU); +} + +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) +{ + if (!READ_ONCE(obj->pin_display)) + return; + + mutex_lock(&obj->base.dev->struct_mutex); + __i915_gem_object_flush_for_display(obj); + mutex_unlock(&obj->base.dev->struct_mutex); } /** @@ -3169,7 +3214,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3207,9 +3251,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) mb(); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ @@ -3221,10 +3262,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) obj->mm.dirty = true; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - i915_gem_object_unpin_pages(obj); return 0; } @@ -3349,7 +3386,7 @@ restart: } if (obj->base.write_domain == I915_GEM_DOMAIN_CPU && - cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + i915_gem_object_is_coherent(obj)) obj->cache_dirty = true; list_for_each_entry(vma, &obj->vma_list, obj_link) @@ -3461,7 +3498,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { struct i915_vma *vma; - u32 old_read_domains, old_write_domain; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3521,24 +3557,14 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - } - - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; + __i915_gem_object_flush_for_display(obj); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return vma; err_unpin_display: @@ -3574,7 +3600,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3593,13 +3618,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_flush_gtt_write_domain(obj); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj, false); - + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -3616,10 +3637,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) obj->base.write_domain = I915_GEM_DOMAIN_CPU; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return 0; } @@ -3942,7 +3959,7 @@ frontbuffer_retire(struct i915_gem_active *active, struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); - intel_fb_obj_flush(obj, true, ORIGIN_CS); + intel_fb_obj_flush(obj, ORIGIN_CS); } void i915_gem_object_init(struct drm_i915_gem_object *obj, @@ -4203,6 +4220,23 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) !i915_gem_context_is_kernel(engine->last_retired_context)); } +void i915_gem_sanitize(struct drm_i915_private *i915) +{ + /* + * If we inherit context state from the BIOS or earlier occupants + * of the GPU, the GPU may be in an inconsistent state when we + * try to take over. The only way to remove the earlier state + * is by resetting. However, resetting on earlier gen is tricky as + * it may impact the display and we are uncertain about the stability + * of the reset, so we only reset recent machines with logical + * context support (that must be reset to remove any stray contexts). + */ + if (HAS_HW_CONTEXTS(i915)) { + int reset = intel_gpu_reset(i915, ALL_ENGINES); + WARN_ON(reset && reset != -ENODEV); + } +} + int i915_gem_suspend(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; @@ -4273,10 +4307,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ - if (HAS_HW_CONTEXTS(dev_priv)) { - int reset = intel_gpu_reset(dev_priv, ALL_ENGINES); - WARN_ON(reset && reset != -ENODEV); - } + i915_gem_sanitize(dev_priv); return 0; @@ -4351,11 +4382,24 @@ static void init_unused_rings(struct drm_i915_private *dev_priv) } } -int -i915_gem_init_hw(struct drm_i915_private *dev_priv) +static int __i915_gem_restart_engines(void *data) { + struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + int err; + + for_each_engine(engine, i915, id) { + err = engine->init_hw(engine); + if (err) + return err; + } + + return 0; +} + +int i915_gem_init_hw(struct drm_i915_private *dev_priv) +{ int ret; dev_priv->gt.last_init_time = ktime_get(); @@ -4401,11 +4445,9 @@ i915_gem_init_hw(struct drm_i915_private *dev_priv) } /* Need to do basic initialisation of all rings first: */ - for_each_engine(engine, dev_priv, id) { - ret = engine->init_hw(engine); - if (ret) - goto out; - } + ret = __i915_gem_restart_engines(dev_priv); + if (ret) + goto out; intel_mocs_init_l3cc_table(dev_priv); @@ -4446,6 +4488,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_clflush_init(dev_priv); + if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; dev_priv->gt.cleanup_engine = intel_engine_cleanup; @@ -4494,6 +4538,11 @@ out_unlock: return ret; } +void i915_gem_init_mmio(struct drm_i915_private *i915) +{ + i915_gem_sanitize(i915); +} + void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) { @@ -4583,8 +4632,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); - dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - init_waitqueue_head(&dev_priv->pending_flip_queue); dev_priv->mm.interruptible = true; @@ -4609,7 +4656,9 @@ err_out: void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) { + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!llist_empty(&dev_priv->mm.free_list)); + WARN_ON(dev_priv->mm.object_count); mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_timeline_fini(&dev_priv->gt.global_timeline); @@ -4627,14 +4676,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) int i915_gem_freeze(struct drm_i915_private *dev_priv) { - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_shrink_all(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); - return 0; } @@ -4949,3 +4994,11 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, sg = i915_gem_object_get_sg(obj, n, &offset); return sg_dma_address(sg) + (offset << PAGE_SHIFT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/scatterlist.c" +#include "selftests/mock_gem_device.c" +#include "selftests/huge_gem_object.c" +#include "selftests/i915_gem_object.c" +#include "selftests/i915_gem_coherency.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index a585d47c420a..5a49487368ca 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -28,9 +28,18 @@ #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(expr) BUG_ON(expr) #define GEM_WARN_ON(expr) WARN_ON(expr) + +#define GEM_DEBUG_DECL(var) var +#define GEM_DEBUG_EXEC(expr) expr +#define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) + #else #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) + +#define GEM_DEBUG_DECL(var) +#define GEM_DEBUG_EXEC(expr) do { } while (0) +#define GEM_DEBUG_BUG_ON(expr) #endif #define I915_NUM_ENGINES 5 diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index b3bc119ec1bb..99ceae7855f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -122,9 +122,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (tmp->base.size >= size) { /* Clear the set of shared fences early */ - ww_mutex_lock(&tmp->resv->lock, NULL); + reservation_object_lock(tmp->resv, NULL); reservation_object_add_excl_fence(tmp->resv, NULL); - ww_mutex_unlock(&tmp->resv->lock); + reservation_object_unlock(tmp->resv); obj = tmp; break; diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c new file mode 100644 index 000000000000..d925fb582ba7 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -0,0 +1,189 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" +#include "intel_frontbuffer.h" +#include "i915_gem_clflush.h" + +static DEFINE_SPINLOCK(clflush_lock); +static u64 clflush_context; + +struct clflush { + struct dma_fence dma; /* Must be first for dma_fence_free() */ + struct i915_sw_fence wait; + struct work_struct work; + struct drm_i915_gem_object *obj; +}; + +static const char *i915_clflush_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) +{ + return "clflush"; +} + +static bool i915_clflush_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static void i915_clflush_release(struct dma_fence *fence) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), dma); + + i915_sw_fence_fini(&clflush->wait); + + BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); + dma_fence_free(&clflush->dma); +} + +static const struct dma_fence_ops i915_clflush_ops = { + .get_driver_name = i915_clflush_get_driver_name, + .get_timeline_name = i915_clflush_get_timeline_name, + .enable_signaling = i915_clflush_enable_signaling, + .wait = dma_fence_default_wait, + .release = i915_clflush_release, +}; + +static void __i915_do_clflush(struct drm_i915_gem_object *obj) +{ + drm_clflush_sg(obj->mm.pages); + obj->cache_dirty = false; + + intel_fb_obj_flush(obj, ORIGIN_CPU); +} + +static void i915_clflush_work(struct work_struct *work) +{ + struct clflush *clflush = container_of(work, typeof(*clflush), work); + struct drm_i915_gem_object *obj = clflush->obj; + + if (!obj->cache_dirty) + goto out; + + if (i915_gem_object_pin_pages(obj)) { + DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); + goto out; + } + + __i915_do_clflush(obj); + + i915_gem_object_unpin_pages(obj); + +out: + i915_gem_object_put(obj); + + dma_fence_signal(&clflush->dma); + dma_fence_put(&clflush->dma); +} + +static int __i915_sw_fence_call +i915_clflush_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + + switch (state) { + case FENCE_COMPLETE: + schedule_work(&clflush->work); + break; + + case FENCE_FREE: + dma_fence_put(&clflush->dma); + break; + } + + return NOTIFY_DONE; +} + +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct clflush *clflush; + + /* + * Stolen memory is always coherent with the GPU as it is explicitly + * marked as wc by the system, or the system is cache-coherent. + * Similarly, we only access struct pages through the CPU cache, so + * anything not backed by physical memory we consider to be always + * coherent and not need clflushing. + */ + if (!i915_gem_object_has_struct_page(obj)) + return; + + obj->cache_dirty = true; + + /* If the GPU is snooping the contents of the CPU cache, + * we do not need to manually clear the CPU cache lines. However, + * the caches are only snooped when the render cache is + * flushed/invalidated. As we always have to emit invalidations + * and flushes when moving into and out of the RENDER domain, correct + * snooping behaviour occurs naturally as the result of our domain + * tracking. + */ + if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj)) + return; + + trace_i915_gem_object_clflush(obj); + + clflush = NULL; + if (!(flags & I915_CLFLUSH_SYNC)) + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (clflush) { + dma_fence_init(&clflush->dma, + &i915_clflush_ops, + &clflush_lock, + clflush_context, + 0); + i915_sw_fence_init(&clflush->wait, i915_clflush_notify); + + clflush->obj = i915_gem_object_get(obj); + INIT_WORK(&clflush->work, i915_clflush_work); + + dma_fence_get(&clflush->dma); + + i915_sw_fence_await_reservation(&clflush->wait, + obj->resv, NULL, + false, I915_FENCE_TIMEOUT, + GFP_KERNEL); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &clflush->dma); + reservation_object_unlock(obj->resv); + + i915_sw_fence_commit(&clflush->wait); + } else if (obj->mm.pages) { + __i915_do_clflush(obj); + } else { + GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); + } +} + +void i915_gem_clflush_init(struct drm_i915_private *i915) +{ + clflush_context = dma_fence_context_alloc(1); +} diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h new file mode 100644 index 000000000000..b62d61a2d15f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEM_CLFLUSH_H__ +#define __I915_GEM_CLFLUSH_H__ + +struct drm_i915_private; +struct drm_i915_gem_object; + +void i915_gem_clflush_init(struct drm_i915_private *i915); +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags); +#define I915_CLFLUSH_FORCE BIT(0) +#define I915_CLFLUSH_SYNC BIT(1) + +#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 17f90c618208..99c46f4dbde6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -236,6 +236,30 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) return 0; } +static u32 default_desc_template(const struct drm_i915_private *i915, + const struct i915_hw_ppgtt *ppgtt) +{ + u32 address_mode; + u32 desc; + + desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + + address_mode = INTEL_LEGACY_32B_CONTEXT; + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) + address_mode = INTEL_LEGACY_64B_CONTEXT; + desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; + + if (IS_GEN8(i915)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers + * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; + */ + + return desc; +} + static struct i915_gem_context * __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) @@ -309,8 +333,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, i915_gem_context_set_bannable(ctx); ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << - GEN8_CTX_ADDRESSING_MODE_SHIFT; + ctx->desc_template = + default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not @@ -332,6 +356,13 @@ err_out: return ERR_PTR(ret); } +static void __destroy_hw_context(struct i915_gem_context *ctx, + struct drm_i915_file_private *file_priv) +{ + idr_remove(&file_priv->context_idr, ctx->user_handle); + context_close(ctx); +} + /** * The default context needs to exist per ring that uses contexts. It stores the * context state of the GPU for applications that don't utilize HW contexts, as @@ -356,12 +387,12 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); return ERR_CAST(ppgtt); } ctx->ppgtt = ppgtt; + ctx->desc_template = default_desc_template(dev_priv, ppgtt); } trace_i915_context_create(ctx); @@ -400,7 +431,8 @@ i915_gem_context_create_gvt(struct drm_device *dev) i915_gem_context_set_closed(ctx); /* not user accessible */ i915_gem_context_clear_bannable(ctx); i915_gem_context_set_force_single_submission(ctx); - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ + if (!i915.enable_guc_submission) + ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); out: @@ -451,6 +483,11 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) return PTR_ERR(ctx); } + /* For easy recognisablity, we want the kernel context to be 0 and then + * all user contexts will have non-zero hw_id. + */ + GEM_BUG_ON(ctx->hw_id); + i915_gem_context_clear_bannable(ctx); ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; @@ -560,16 +597,15 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; enum intel_engine_id id; - u32 flags = hw_flags | MI_MM_SPACE_GTT; + u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ i915.semaphores ? INTEL_INFO(dev_priv)->num_rings - 1 : 0; - int len, ret; + int len; /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value @@ -577,7 +613,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(dev_priv)) { - ret = engine->emit_flush(req, EMIT_INVALIDATE); + int ret = engine->emit_flush(req, EMIT_INVALIDATE); if (ret) return ret; } @@ -593,99 +629,92 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) if (INTEL_GEN(dev_priv) >= 7) len += 2 + (num_rings ? 4*num_rings + 6 : 0); - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_GEN(dev_priv) >= 7) { - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; if (num_rings) { struct intel_engine_cs *signaller; - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; - intel_ring_emit_reg(ring, - RING_PSMI_CTL(signaller->mmio_base)); - intel_ring_emit(ring, - _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset( + RING_PSMI_CTL(signaller->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } } } - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, - i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); + *cs++ = MI_NOOP; + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv */ - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; if (INTEL_GEN(dev_priv) >= 7) { if (num_rings) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; last_reg = RING_PSMI_CTL(signaller->mmio_base); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = _MASKED_BIT_DISABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } /* Insert a delay before the next switch! */ - intel_ring_emit(ring, - MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - i915_ggtt_offset(engine->scratch)); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = MI_NOOP; } - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); - return ret; + return 0; } static int remap_l3(struct drm_i915_gem_request *req, int slice) { - u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_ring *ring = req->ring; - int i, ret; + u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice]; + int i; if (!remap_info) return 0; - ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* * Note: We do not worry about the concurrent register cacheline hang * here because no other code should access these registers other than * at initialization time. */ - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); - intel_ring_emit(ring, remap_info[i]); + *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); + *cs++ = remap_info[i]; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1014,8 +1043,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return PTR_ERR(ctx); } - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); mutex_unlock(&dev->struct_mutex); DRM_DEBUG("HW context %d destroyed\n", args->ctx_id); @@ -1164,3 +1192,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_context.c" +#include "selftests/i915_gem_context.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index d037adcda6f2..3e276eee0450 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -307,3 +307,8 @@ fail_detach: return ERR_PTR(ret); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_dmabuf.c" +#include "selftests/i915_gem_dmabuf.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index c181b1bb3d2c..a0de5734f7d0 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -258,6 +258,9 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, int ret = 0; lockdep_assert_held(&vm->i915->drm.struct_mutex); + GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + trace_i915_gem_evict_node(vm, target, flags); /* Retire before we search the active list. Although we have @@ -271,11 +274,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, check_color = vm->mm.color_adjust; if (check_color) { /* Expand search to cover neighbouring guard pages (or lack!) */ - if (start > vm->start) + if (start) start -= I915_GTT_PAGE_SIZE; - if (end < vm->start + vm->total) - end += I915_GTT_PAGE_SIZE; + + /* Always look at the page afterwards to avoid the end-of-GTT */ + end += I915_GTT_PAGE_SIZE; } + GEM_BUG_ON(start >= end); drm_mm_for_each_node_in_range(node, &vm->mm, start, end) { /* If we find any non-objects (!vma), we cannot evict them */ @@ -284,6 +289,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } + GEM_BUG_ON(!node->allocated); vma = container_of(node, typeof(*vma), node); /* If we are using coloring to insert guard pages between @@ -387,3 +393,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_evict.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d02cfaefe1c8..aa75ea2d0578 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -28,12 +28,14 @@ #include <linux/dma_remapping.h> #include <linux/reservation.h> +#include <linux/sync_file.h> #include <linux/uaccess.h> #include <drm/drmP.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -1110,13 +1112,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; + if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) + continue; + + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) { + i915_gem_clflush_object(obj, 0); + obj->base.write_domain = 0; + } + ret = i915_gem_request_await_object (req, obj, obj->base.pending_write_domain); if (ret) return ret; - - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj, false); } /* Unconditionally flush any chipset caches (for streaming writes). */ @@ -1297,12 +1304,12 @@ static void eb_export_fence(struct drm_i915_gem_object *obj, * handle an error right now. Worst case should be missed * synchronisation leading to rendering corruption. */ - ww_mutex_lock(&resv->lock, NULL); + reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) reservation_object_add_excl_fence(resv, &req->fence); else if (reservation_object_reserve_shared(resv) == 0) reservation_object_add_shared_fence(resv, &req->fence); - ww_mutex_unlock(&resv->lock); + reservation_object_unlock(resv); } static void @@ -1313,8 +1320,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - u32 old_read = obj->base.read_domains; - u32 old_write = obj->base.write_domain; obj->base.write_domain = obj->base.pending_write_domain; if (obj->base.write_domain) @@ -1325,32 +1330,31 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, req, vma->exec_entry->flags); eb_export_fence(obj, req, vma->exec_entry->flags); - trace_i915_gem_object_change_domain(obj, old_read, old_write); } } static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret, i; + u32 *cs; + int i; if (!IS_GEN7(req->i915) || req->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } - ret = intel_ring_begin(req, 4 * 3); - if (ret) - return ret; + cs = intel_ring_begin(req, 4 * 3); + if (IS_ERR(cs)) + return PTR_ERR(cs); for (i = 0; i < 4; i++) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); - intel_ring_emit(ring, 0); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); + *cs++ = 0; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -1408,10 +1412,7 @@ execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { - struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; - int instp_mode; - u32 instp_mask; int ret; ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); @@ -1422,56 +1423,11 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { - case I915_EXEC_CONSTANTS_REL_GENERAL: - case I915_EXEC_CONSTANTS_ABSOLUTE: - case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && params->engine->id != RCS) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev_priv)->gen < 4) { - DRM_DEBUG("no rel constants on pre-gen4\n"); - return -EINVAL; - } - - if (INTEL_INFO(dev_priv)->gen > 5 && - instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev_priv)->gen >= 6) - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } - break; - default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + if (args->flags & I915_EXEC_CONSTANTS_MASK) { + DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n"); return -EINVAL; } - if (params->engine->id == RCS && - instp_mode != dev_priv->relative_constants_mode) { - struct intel_ring *ring = params->request->ring; - - ret = intel_ring_begin(params->request, 4); - if (ret) - return ret; - - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, INSTPM); - intel_ring_emit(ring, instp_mask << 16 | instp_mode); - intel_ring_advance(ring); - - dev_priv->relative_constants_mode = instp_mode; - } - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { ret = i915_reset_gen7_sol_offsets(params->request); if (ret) @@ -1491,8 +1447,6 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - i915_gem_execbuffer_move_to_active(vmas, params->request); return 0; @@ -1591,6 +1545,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_execbuffer_params *params = ¶ms_master; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 dispatch_flags; + struct dma_fence *in_fence = NULL; + struct sync_file *out_fence = NULL; + int out_fence_fd = -1; int ret; bool need_relocs; @@ -1634,6 +1591,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, dispatch_flags |= I915_DISPATCH_RS; } + if (args->flags & I915_EXEC_FENCE_IN) { + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!in_fence) + return -EINVAL; + } + + if (args->flags & I915_EXEC_FENCE_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + goto err_in_fence; + } + } + /* Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the * process. Upon first dispatch, we acquire another prolonged @@ -1778,6 +1749,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err_batch_unpin; } + if (in_fence) { + ret = i915_gem_request_await_dma_fence(params->request, + in_fence); + if (ret < 0) + goto err_request; + } + + if (out_fence_fd != -1) { + out_fence = sync_file_create(¶ms->request->fence); + if (!out_fence) { + ret = -ENOMEM; + goto err_request; + } + } + /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this * request is retired will the the batch_obj be moved onto the @@ -1802,9 +1788,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->dispatch_flags = dispatch_flags; params->ctx = ctx; + trace_i915_gem_request_queue(params->request, dispatch_flags); + ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); + if (out_fence) { + if (ret == 0) { + fd_install(out_fence_fd, out_fence->file); + args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ + args->rsvd2 |= (u64)out_fence_fd << 32; + out_fence_fd = -1; + } else { + fput(out_fence->file); + } + } err_batch_unpin: /* @@ -1826,6 +1824,10 @@ pre_mutex_err: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ intel_runtime_pm_put(dev_priv); + if (out_fence_fd != -1) + put_unused_fd(out_fence_fd); +err_in_fence: + dma_fence_put(in_fence); return ret; } @@ -1933,11 +1935,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } - if (args->rsvd2 != 0) { - DRM_DEBUG("dirty rvsd2 field\n"); - return -EINVAL; - } - exec2_list = drm_malloc_gfp(args->buffer_count, sizeof(*exec2_list), GFP_TEMPORARY); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2801a4d56324..6fdbd5ae4fcb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -23,6 +23,9 @@ * */ +#include <linux/slab.h> /* fault-inject.h is not standalone! */ + +#include <linux/fault-inject.h> #include <linux/log2.h> #include <linux/random.h> #include <linux/seq_file.h> @@ -187,11 +190,17 @@ static int ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 unused) { - u32 pte_flags = 0; + u32 pte_flags; + int ret; + + ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); + if (ret) + return ret; vma->pages = vma->obj->mm.pages; /* Currently applicable only to VLV */ + pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -203,9 +212,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, - vma->node.start, - vma->size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static gen8_pte_t gen8_pte_encode(dma_addr_t addr, @@ -340,268 +347,224 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, return pte; } -static int __setup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, gfp_t flags) +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) { - struct device *kdev = &dev_priv->drm.pdev->dev; + struct page *page; - p->page = alloc_page(flags); - if (!p->page) - return -ENOMEM; + if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) + i915_gem_shrink_all(vm->i915); - p->daddr = dma_map_page(kdev, - p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (vm->free_pages.nr) + return vm->free_pages.pages[--vm->free_pages.nr]; - if (dma_mapping_error(kdev, p->daddr)) { - __free_page(p->page); - return -EINVAL; - } + page = alloc_page(gfp); + if (!page) + return NULL; - return 0; + if (vm->pt_kmap_wc) + set_pages_array_wc(&page, 1); + + return page; } -static int setup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p) +static void vm_free_pages_release(struct i915_address_space *vm) { - return __setup_page_dma(dev_priv, p, I915_GFP_DMA); + GEM_BUG_ON(!pagevec_count(&vm->free_pages)); + + if (vm->pt_kmap_wc) + set_pages_array_wb(vm->free_pages.pages, + pagevec_count(&vm->free_pages)); + + __pagevec_release(&vm->free_pages); } -static void cleanup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p) +static void vm_free_page(struct i915_address_space *vm, struct page *page) { - struct pci_dev *pdev = dev_priv->drm.pdev; + if (!pagevec_add(&vm->free_pages, page)) + vm_free_pages_release(vm); +} - if (WARN_ON(!p->page)) - return; +static int __setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + gfp_t gfp) +{ + p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY); + if (unlikely(!p->page)) + return -ENOMEM; - dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(p->page); - memset(p, 0, sizeof(*p)); + p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { + vm_free_page(vm, p->page); + return -ENOMEM; + } + + return 0; } -static void *kmap_page_dma(struct i915_page_dma *p) +static int setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p) { - return kmap_atomic(p->page); + return __setup_page_dma(vm, p, I915_GFP_DMA); } -/* We use the flushing unmap only with ppgtt structures: - * page directories, page tables and scratch pages. - */ -static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) +static void cleanup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p) { - /* There are only few exceptions for gen >=6. chv and bxt. - * And we are not sure about the latter so play safe for now. - */ - if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) - drm_clflush_virt_range(vaddr, PAGE_SIZE); - - kunmap_atomic(vaddr); + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + vm_free_page(vm, p->page); } -#define kmap_px(px) kmap_page_dma(px_base(px)) -#define kunmap_px(ppgtt, vaddr) \ - kunmap_page_dma((ppgtt)->base.i915, (vaddr)) +#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) -#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px)) -#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px)) -#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) -#define fill32_px(dev_priv, px, v) \ - fill_page_dma_32((dev_priv), px_base(px), (v)) +#define setup_px(vm, px) setup_page_dma((vm), px_base(px)) +#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) +#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v)) +#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v)) -static void fill_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint64_t val) +static void fill_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + const u64 val) { + u64 * const vaddr = kmap_atomic(p->page); int i; - uint64_t * const vaddr = kmap_page_dma(p); for (i = 0; i < 512; i++) vaddr[i] = val; - kunmap_page_dma(dev_priv, vaddr); + kunmap_atomic(vaddr); } -static void fill_page_dma_32(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint32_t val32) +static void fill_page_dma_32(struct i915_address_space *vm, + struct i915_page_dma *p, + const u32 v) { - uint64_t v = val32; - - v = v << 32 | val32; - - fill_page_dma(dev_priv, p, v); + fill_page_dma(vm, p, (u64)v << 32 | v); } static int -setup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch, - gfp_t gfp) +setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO); + return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO); } -static void cleanup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch) +static void cleanup_scratch_page(struct i915_address_space *vm) { - cleanup_page_dma(dev_priv, scratch); + cleanup_page_dma(vm, &vm->scratch_page); } -static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) +static struct i915_page_table *alloc_pt(struct i915_address_space *vm) { struct i915_page_table *pt; - const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES; - int ret = -ENOMEM; - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - if (!pt) + pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pt)) return ERR_PTR(-ENOMEM); - pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), - GFP_KERNEL); - - if (!pt->used_ptes) - goto fail_bitmap; - - ret = setup_px(dev_priv, pt); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pt))) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + pt->used_ptes = 0; return pt; - -fail_page_m: - kfree(pt->used_ptes); -fail_bitmap: - kfree(pt); - - return ERR_PTR(ret); } -static void free_pt(struct drm_i915_private *dev_priv, - struct i915_page_table *pt) +static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - cleanup_px(dev_priv, pt); - kfree(pt->used_ptes); + cleanup_px(vm, pt); kfree(pt); } static void gen8_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen8_pte_t scratch_pte; - - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); - - fill_px(vm->i915, pt, scratch_pte); + fill_px(vm, pt, + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC)); } static void gen6_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen6_pte_t scratch_pte; - - WARN_ON(vm->scratch_page.daddr == 0); - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); - - fill32_px(vm->i915, pt, scratch_pte); + fill32_px(vm, pt, + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); } -static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) +static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; - int ret = -ENOMEM; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) + pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), - sizeof(*pd->used_pdes), GFP_KERNEL); - if (!pd->used_pdes) - goto fail_bitmap; - - ret = setup_px(dev_priv, pd); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pd))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + pd->used_pdes = 0; return pd; - -fail_page_m: - kfree(pd->used_pdes); -fail_bitmap: - kfree(pd); - - return ERR_PTR(ret); } -static void free_pd(struct drm_i915_private *dev_priv, +static void free_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - if (px_page(pd)) { - cleanup_px(dev_priv, pd); - kfree(pd->used_pdes); - kfree(pd); - } + cleanup_px(vm, pd); + kfree(pd); } static void gen8_initialize_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - gen8_pde_t scratch_pde; + unsigned int i; - scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); - - fill_px(vm->i915, pd, scratch_pde); + fill_px(vm, pd, + gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); + for (i = 0; i < I915_PDES; i++) + pd->page_table[i] = vm->scratch_pt; } -static int __pdp_init(struct drm_i915_private *dev_priv, +static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - size_t pdpes = I915_PDPES_PER_PDP(dev_priv); + const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915); + unsigned int i; - pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), - sizeof(unsigned long), - GFP_KERNEL); - if (!pdp->used_pdpes) + pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), + GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pdp->page_directory)) return -ENOMEM; - pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), - GFP_KERNEL); - if (!pdp->page_directory) { - kfree(pdp->used_pdpes); - /* the PDP might be the statically allocated top level. Keep it - * as clean as possible */ - pdp->used_pdpes = NULL; - return -ENOMEM; - } + for (i = 0; i < pdpes; i++) + pdp->page_directory[i] = vm->scratch_pd; return 0; } static void __pdp_fini(struct i915_page_directory_pointer *pdp) { - kfree(pdp->used_pdpes); kfree(pdp->page_directory); pdp->page_directory = NULL; } -static struct -i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv) +static struct i915_page_directory_pointer * +alloc_pdp(struct i915_address_space *vm) { struct i915_page_directory_pointer *pdp; int ret = -ENOMEM; - WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv)); + WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); if (!pdp) return ERR_PTR(-ENOMEM); - ret = __pdp_init(dev_priv, pdp); + ret = __pdp_init(vm, pdp); if (ret) goto fail_bitmap; - ret = setup_px(dev_priv, pdp); + ret = setup_px(vm, pdp); if (ret) goto fail_page_m; @@ -615,12 +578,12 @@ fail_bitmap: return ERR_PTR(ret); } -static void free_pdp(struct drm_i915_private *dev_priv, +static void free_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - cleanup_px(dev_priv, pdp); + if (USES_FULL_48BIT_PPGTT(vm->i915)) { + cleanup_px(vm, pdp); kfree(pdp); } } @@ -632,47 +595,18 @@ static void gen8_initialize_pdp(struct i915_address_space *vm, scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); - fill_px(vm->i915, pdp, scratch_pdpe); + fill_px(vm, pdp, scratch_pdpe); } static void gen8_initialize_pml4(struct i915_address_space *vm, struct i915_pml4 *pml4) { - gen8_ppgtt_pml4e_t scratch_pml4e; - - scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), - I915_CACHE_LLC); - - fill_px(vm->i915, pml4, scratch_pml4e); -} - -static void -gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, - struct i915_page_directory_pointer *pdp, - struct i915_page_directory *pd, - int index) -{ - gen8_ppgtt_pdpe_t *page_directorypo; - - if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) - return; + unsigned int i; - page_directorypo = kmap_px(pdp); - page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_px(ppgtt, page_directorypo); -} - -static void -gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt, - struct i915_pml4 *pml4, - struct i915_page_directory_pointer *pdp, - int index) -{ - gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); - - WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))); - pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_px(ppgtt, pagemap); + fill_px(vm, pml4, + gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) + pml4->pdps[i] = vm->scratch_pdp; } /* Broadwell Page Directory Pointer Descriptors */ @@ -680,23 +614,22 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - int ret; + u32 *cs; BUG_ON(entry >= 4); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); - intel_ring_emit(ring, upper_32_bits(addr)); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); - intel_ring_emit(ring, lower_32_bits(addr)); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry)); + *cs++ = upper_32_bits(addr); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); + *cs++ = lower_32_bits(addr); + intel_ring_advance(req, cs); return 0; } @@ -738,70 +671,77 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) */ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, struct i915_page_table *pt, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned int num_entries = gen8_pte_count(start, length); unsigned int pte = gen8_pte_index(start); unsigned int pte_end = pte + num_entries; - gen8_pte_t *pt_vaddr; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t *vaddr; - if (WARN_ON(!px_page(pt))) - return false; + GEM_BUG_ON(num_entries > pt->used_ptes); - GEM_BUG_ON(pte_end > GEN8_PTES); + pt->used_ptes -= num_entries; + if (!pt->used_ptes) + return true; - bitmap_clear(pt->used_ptes, pte, num_entries); - if (USES_FULL_PPGTT(vm->i915)) { - if (bitmap_empty(pt->used_ptes, GEN8_PTES)) - return true; - } + vaddr = kmap_atomic_px(pt); + while (pte < pte_end) + vaddr[pte++] = scratch_pte; + kunmap_atomic(vaddr); - pt_vaddr = kmap_px(pt); + return false; +} - while (pte < pte_end) - pt_vaddr[pte++] = scratch_pte; +static void gen8_ppgtt_set_pde(struct i915_address_space *vm, + struct i915_page_directory *pd, + struct i915_page_table *pt, + unsigned int pde) +{ + gen8_pde_t *vaddr; - kunmap_px(ppgtt, pt_vaddr); + pd->page_table[pde] = pt; - return false; + vaddr = kmap_atomic_px(pd); + vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); + kunmap_atomic(vaddr); } -/* Removes entries from a single page dir, releasing it if it's empty. - * Caller can use the return value to update higher-level entries - */ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, struct i915_page_directory *pd, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; - uint64_t pde; - gen8_pde_t *pde_vaddr; - gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), - I915_CACHE_LLC); + u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { - if (WARN_ON(!pd->page_table[pde])) - break; - - if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { - __clear_bit(pde, pd->used_pdes); - pde_vaddr = kmap_px(pd); - pde_vaddr[pde] = scratch_pde; - kunmap_px(ppgtt, pde_vaddr); - free_pt(vm->i915, pt); - } + if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) + continue; + + gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); + pd->used_pdes--; + + free_pt(vm, pt); } - if (bitmap_empty(pd->used_pdes, I915_PDES)) - return true; + return !pd->used_pdes; +} - return false; +static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pd, + unsigned int pdpe) +{ + gen8_ppgtt_pdpe_t *vaddr; + + pdp->page_directory[pdpe] = pd; + if (!USES_FULL_48BIT_PPGTT(vm->i915)) + return; + + vaddr = kmap_atomic_px(pdp); + vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); + kunmap_atomic(vaddr); } /* Removes entries from a single page dir pointer, releasing it if it's empty. @@ -809,138 +749,184 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, */ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; - uint64_t pdpe; + unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (WARN_ON(!pdp->page_directory[pdpe])) - break; + if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) + continue; - if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { - __clear_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe); - free_pd(vm->i915, pd); - } + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + pdp->used_pdpes--; + + free_pd(vm, pd); } - mark_tlbs_dirty(ppgtt); + return !pdp->used_pdpes; +} - if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) - return true; +static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); +} - return false; +static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + unsigned int pml4e) +{ + gen8_ppgtt_pml4e_t *vaddr; + + pml4->pdps[pml4e] = pdp; + + vaddr = kmap_atomic_px(pml4); + vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); + kunmap_atomic(vaddr); } /* Removes entries from a single pml4. * This is the top-level structure in 4-level page tables used on gen8+. * Empty entries are always scratch pml4e. */ -static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; - uint64_t pml4e; + unsigned int pml4e; GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (WARN_ON(!pml4->pdps[pml4e])) - break; + if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) + continue; - if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { - __clear_bit(pml4e, pml4->used_pml4es); - gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e); - free_pdp(vm->i915, pdp); - } + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + + free_pdp(vm, pdp); } } -static void gen8_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); +struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +}; - if (USES_FULL_48BIT_PPGTT(vm->i915)) - gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); - else - gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); +struct gen8_insert_pte { + u16 pml4e; + u16 pdpe; + u16 pde; + u16 pte; +}; + +static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) +{ + return (struct gen8_insert_pte) { + gen8_pml4e_index(start), + gen8_pdpe_index(start), + gen8_pde_index(start), + gen8_pte_index(start), + }; } -static void -gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, +static __always_inline bool +gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, - struct sg_page_iter *sg_iter, - uint64_t start, + struct sgt_dma *iter, + struct gen8_insert_pte *idx, enum i915_cache_level cache_level) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen8_pte_t *pt_vaddr; - unsigned pdpe = gen8_pdpe_index(start); - unsigned pde = gen8_pde_index(start); - unsigned pte = gen8_pte_index(start); - - pt_vaddr = NULL; - - while (__sg_page_iter_next(sg_iter)) { - if (pt_vaddr == NULL) { - struct i915_page_directory *pd = pdp->page_directory[pdpe]; - struct i915_page_table *pt = pd->page_table[pde]; - pt_vaddr = kmap_px(pt); + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + gen8_pte_t *vaddr; + bool ret; + + GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + pd = pdp->page_directory[idx->pdpe]; + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); + do { + vaddr[idx->pte] = pte_encode | iter->dma; + + iter->dma += PAGE_SIZE; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + ret = false; + break; + } + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; } - pt_vaddr[pte] = - gen8_pte_encode(sg_page_iter_dma_address(sg_iter), - cache_level); - if (++pte == GEN8_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; - if (++pde == I915_PDES) { - if (++pdpe == I915_PDPES_PER_PDP(vm->i915)) + if (++idx->pte == GEN8_PTES) { + idx->pte = 0; + + if (++idx->pde == I915_PDES) { + idx->pde = 0; + + /* Limited by sg length for 3lvl */ + if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { + idx->pdpe = 0; + ret = true; break; - pde = 0; + } + + GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + pd = pdp->page_directory[idx->pdpe]; } - pte = 0; + + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); } - } + } while (1); + kunmap_atomic(vaddr); - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + return ret; } -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, - struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, - u32 unused) +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, + struct sg_table *pages, + u64 start, + enum i915_cache_level cache_level, + u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sg_page_iter sg_iter; + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; + struct gen8_insert_pte idx = gen8_insert_pte(start); - __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); + gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, + cache_level); +} - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, - cache_level); - } else { - struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; +static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, + struct sg_table *pages, + u64 start, + enum i915_cache_level cache_level, + u32 unused) +{ + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; + struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; + struct gen8_insert_pte idx = gen8_insert_pte(start); - gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { - gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, - start, cache_level); - } - } + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, + &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); } -static void gen8_free_page_tables(struct drm_i915_private *dev_priv, +static void gen8_free_page_tables(struct i915_address_space *vm, struct i915_page_directory *pd) { int i; @@ -948,38 +934,34 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv, if (!px_page(pd)) return; - for_each_set_bit(i, pd->used_pdes, I915_PDES) { - if (WARN_ON(!pd->page_table[i])) - continue; - - free_pt(dev_priv, pd->page_table[i]); - pd->page_table[i] = NULL; + for (i = 0; i < I915_PDES; i++) { + if (pd->page_table[i] != vm->scratch_pt) + free_pt(vm, pd->page_table[i]); } } static int gen8_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { ret = PTR_ERR(vm->scratch_pt); goto free_scratch_page; } - vm->scratch_pd = alloc_pd(dev_priv); + vm->scratch_pd = alloc_pd(vm); if (IS_ERR(vm->scratch_pd)) { ret = PTR_ERR(vm->scratch_pd); goto free_pt; } - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - vm->scratch_pdp = alloc_pdp(dev_priv); + if (USES_FULL_48BIT_PPGTT(dev)) { + vm->scratch_pdp = alloc_pdp(vm); if (IS_ERR(vm->scratch_pdp)) { ret = PTR_ERR(vm->scratch_pdp); goto free_pd; @@ -994,11 +976,11 @@ static int gen8_init_scratch(struct i915_address_space *vm) return 0; free_pd: - free_pd(dev_priv, vm->scratch_pd); + free_pd(vm, vm->scratch_pd); free_pt: - free_pt(dev_priv, vm->scratch_pt); + free_pt(vm, vm->scratch_pt); free_scratch_page: - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return ret; } @@ -1036,44 +1018,41 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) static void gen8_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - if (USES_FULL_48BIT_PPGTT(dev_priv)) - free_pdp(dev_priv, vm->scratch_pdp); - free_pd(dev_priv, vm->scratch_pd); - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + if (USES_FULL_48BIT_PPGTT(vm->i915)) + free_pdp(vm, vm->scratch_pdp); + free_pd(vm, vm->scratch_pd); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } -static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv, +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { int i; - for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) { - if (WARN_ON(!pdp->page_directory[i])) + for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) { + if (pdp->page_directory[i] == vm->scratch_pd) continue; - gen8_free_page_tables(dev_priv, pdp->page_directory[i]); - free_pd(dev_priv, pdp->page_directory[i]); + gen8_free_page_tables(vm, pdp->page_directory[i]); + free_pd(vm, pdp->page_directory[i]); } - free_pdp(dev_priv, pdp); + free_pdp(vm, pdp); } static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; int i; - for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { - if (WARN_ON(!ppgtt->pml4.pdps[i])) + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { + if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp) continue; - gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]); + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); } - cleanup_px(dev_priv, &ppgtt->pml4); + cleanup_px(&ppgtt->base, &ppgtt->pml4); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -1084,414 +1063,154 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, false); - if (!USES_FULL_48BIT_PPGTT(dev_priv)) - gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp); + if (!USES_FULL_48BIT_PPGTT(vm->i915)) + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); else gen8_ppgtt_cleanup_4lvl(ppgtt); gen8_free_scratch(vm); } -/** - * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. - * @vm: Master vm structure. - * @pd: Page directory for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pts: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page tables. Extremely similar to - * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by - * the page directory boundary (instead of the page directory pointer). That - * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is - * possible, and likely that the caller will need to use multiple calls of this - * function to achieve the appropriate allocation. - * - * Return: 0 if success; negative error code otherwise. - */ -static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, - struct i915_page_directory *pd, - uint64_t start, - uint64_t length, - unsigned long *new_pts) +static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 start, u64 length) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; - uint32_t pde; + u64 from = start; + unsigned int pde; gen8_for_each_pde(pt, pd, start, length, pde) { - /* Don't reallocate page tables */ - if (test_bit(pde, pd->used_pdes)) { - /* Scratch is never allocated this way */ - WARN_ON(pt == vm->scratch_pt); - continue; - } + if (pt == vm->scratch_pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind; - pt = alloc_pt(dev_priv); - if (IS_ERR(pt)) - goto unwind_out; + gen8_initialize_pt(vm, pt); - gen8_initialize_pt(vm, pt); - pd->page_table[pde] = pt; - __set_bit(pde, new_pts); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); - } + gen8_ppgtt_set_pde(vm, pd, pt, pde); + pd->used_pdes++; + } + pt->used_ptes += gen8_pte_count(start, length); + } return 0; -unwind_out: - for_each_set_bit(pde, new_pts, I915_PDES) - free_pt(dev_priv, pd->page_table[pde]); - +unwind: + gen8_ppgtt_clear_pd(vm, pd, from, start - from); return -ENOMEM; } -/** - * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. - * @vm: Master vm structure. - * @pdp: Page directory pointer for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pds: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directories starting at the pde index of - * @start, and ending at the pde index @start + @length. This function will skip - * over already allocated page directories within the range, and only allocate - * new ones, setting the appropriate pointer within the pdp as well as the - * correct position in the bitmap @new_pds. - * - * The function will only allocate the pages within the range for a give page - * directory pointer. In other words, if @start + @length straddles a virtually - * addressed PDP boundary (512GB for 4k pages), there will be more allocations - * required by the caller, This is not currently possible, and the BUG in the - * code will prevent it. - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length, - unsigned long *new_pds) +static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + u64 start, u64 length) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_directory *pd; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); - - WARN_ON(!bitmap_empty(new_pds, pdpes)); + u64 from = start; + unsigned int pdpe; + int ret; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (test_bit(pdpe, pdp->used_pdpes)) - continue; - - pd = alloc_pd(dev_priv); - if (IS_ERR(pd)) - goto unwind_out; - - gen8_initialize_pd(vm, pd); - pdp->page_directory[pdpe] = pd; - __set_bit(pdpe, new_pds); - trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); - } - - return 0; - -unwind_out: - for_each_set_bit(pdpe, new_pds, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); - - return -ENOMEM; -} - -/** - * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. - * @vm: Master vm structure. - * @pml4: Page map level 4 for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pdps: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directory pointers. Extremely similar to - * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). - * The main difference is here we are limited by the pml4 boundary (instead of - * the page directory pointer). - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length, - unsigned long *new_pdps) -{ - struct drm_i915_private *dev_priv = vm->i915; - struct i915_page_directory_pointer *pdp; - uint32_t pml4e; + if (pd == vm->scratch_pd) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); + gen8_initialize_pd(vm, pd); + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) { - pdp = alloc_pdp(dev_priv); - if (IS_ERR(pdp)) - goto unwind_out; + mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); + } - gen8_initialize_pdp(vm, pdp); - pml4->pdps[pml4e] = pdp; - __set_bit(pml4e, new_pdps); - trace_i915_page_directory_pointer_entry_alloc(vm, - pml4e, - start, - GEN8_PML4E_SHIFT); + ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); + if (unlikely(ret)) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + pdp->used_pdpes--; + free_pd(vm, pd); + goto unwind; } } return 0; -unwind_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - free_pdp(dev_priv, pml4->pdps[pml4e]); - - return -ENOMEM; -} - -static void -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) -{ - kfree(new_pts); - kfree(new_pds); -} - -/* Fills in the page directory bitmap, and the array of page tables bitmap. Both - * of these are based on the number of PDPEs in the system. - */ -static -int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long **new_pts, - uint32_t pdpes) -{ - unsigned long *pds; - unsigned long *pts; - - pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); - if (!pds) - return -ENOMEM; - - pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), - GFP_TEMPORARY); - if (!pts) - goto err_out; - - *new_pds = pds; - *new_pts = pts; - - return 0; - -err_out: - free_gen8_temp_bitmaps(pds, pts); +unwind: + gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); return -ENOMEM; } -static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - unsigned long *new_page_dirs, *new_page_tables; - struct drm_i915_private *dev_priv = vm->i915; - struct i915_page_directory *pd; - const uint64_t orig_start = start; - const uint64_t orig_length = length; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); - int ret; - - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); - if (ret) - return ret; - - /* Do the allocations first so we can easily bail out */ - ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, - new_page_dirs); - if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); - return ret; - } - - /* For every page directory referenced, allocate page tables */ - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, - new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); - if (ret) - goto err_out; - } - - start = orig_start; - length = orig_length; - - /* Allocations have completed successfully, so set the bitmaps, and do - * the mappings. */ - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - gen8_pde_t *const page_directory = kmap_px(pd); - struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; - - /* Every pd should be allocated, we just did that above. */ - WARN_ON(!pd); - - gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - /* Same reasoning as pd */ - WARN_ON(!pt); - WARN_ON(!pd_len); - WARN_ON(!gen8_pte_count(pd_start, pd_len)); - - /* Set our used ptes within the page table */ - bitmap_set(pt->used_ptes, - gen8_pte_index(pd_start), - gen8_pte_count(pd_start, pd_len)); - - /* Our pde is now pointing to the pagetable, pt */ - __set_bit(pde, pd->used_pdes); - - /* Map the PDE to the page table */ - page_directory[pde] = gen8_pde_encode(px_dma(pt), - I915_CACHE_LLC); - trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, - gen8_pte_index(start), - gen8_pte_count(start, length), - GEN8_PTES); - - /* NB: We haven't yet mapped ptes to pages. At this - * point we're still relying on insert_entries() */ - } - - kunmap_px(ppgtt, page_directory); - __set_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); - } - - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); - mark_tlbs_dirty(ppgtt); - return 0; - -err_out: - while (pdpe--) { - unsigned long temp; - - for_each_set_bit(temp, new_page_tables + pdpe * - BITS_TO_LONGS(I915_PDES), I915_PDES) - free_pt(dev_priv, - pdp->page_directory[pdpe]->page_table[temp]); - } - - for_each_set_bit(pdpe, new_page_dirs, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); - - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); - mark_tlbs_dirty(ppgtt); - return ret; + return gen8_ppgtt_alloc_pdp(vm, + &i915_vm_to_ppgtt(vm)->pdp, start, length); } -static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, + u64 start, u64 length) { - DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - int ret = 0; - - /* Do the pml4 allocations first, so we don't need to track the newly - * allocated tables below the pdp */ - bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); - - /* The pagedirectory and pagetable allocations are done in the shared 3 - * and 4 level code. Just allocate the pdps. - */ - ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, - new_pdps); - if (ret) - return ret; - - WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, - "The allocation has spanned more than 512GB. " - "It is highly likely this is incorrect."); + u64 from = start; + u32 pml4e; + int ret; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - WARN_ON(!pdp); + if (pml4->pdps[pml4e] == vm->scratch_pdp) { + pdp = alloc_pdp(vm); + if (IS_ERR(pdp)) + goto unwind; - ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); - if (ret) - goto err_out; + gen8_initialize_pdp(vm, pdp); + gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); + } - gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e); + ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); + if (unlikely(ret)) { + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + free_pdp(vm, pdp); + goto unwind; + } } - bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, - GEN8_PML4ES_PER_PML4); - return 0; -err_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]); - - return ret; -} - -static int gen8_alloc_va_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (USES_FULL_48BIT_PPGTT(vm->i915)) - return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); - else - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); +unwind: + gen8_ppgtt_clear_4lvl(vm, from, start - from); + return -ENOMEM; } -static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, - uint64_t start, uint64_t length, +static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, + u64 start, u64 length, gen8_pte_t scratch_pte, struct seq_file *m) { struct i915_page_directory *pd; - uint32_t pdpe; + u32 pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; + u64 pd_len = length; + u64 pd_start = start; + u32 pde; - if (!test_bit(pdpe, pdp->used_pdpes)) + if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) continue; seq_printf(m, "\tPDPE #%d\n", pdpe); gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - uint32_t pte; + u32 pte; gen8_pte_t *pt_vaddr; - if (!test_bit(pde, pd->used_pdes)) + if (pd->page_table[pde] == ppgtt->base.scratch_pt) continue; - pt_vaddr = kmap_px(pt); + pt_vaddr = kmap_atomic_px(pt); for (pte = 0; pte < GEN8_PTES; pte += 4) { - uint64_t va = - (pdpe << GEN8_PDPE_SHIFT) | - (pde << GEN8_PDE_SHIFT) | - (pte << GEN8_PTE_SHIFT); + u64 va = (pdpe << GEN8_PDPE_SHIFT | + pde << GEN8_PDE_SHIFT | + pte << GEN8_PTE_SHIFT); int i; bool found = false; @@ -1510,9 +1229,6 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, } seq_puts(m, "\n"); } - /* don't use kunmap_px, it could trigger - * an unnecessary flush. - */ kunmap_atomic(pt_vaddr); } } @@ -1521,53 +1237,57 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { struct i915_address_space *vm = &ppgtt->base; - uint64_t start = ppgtt->base.start; - uint64_t length = ppgtt->base.total; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + u64 start = 0, length = ppgtt->base.total; if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); + gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); } else { - uint64_t pml4e; + u64 pml4e; struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) + if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp) continue; seq_printf(m, " PML4E #%llu\n", pml4e); - gen8_dump_pdp(pdp, start, length, scratch_pte, m); + gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m); } } } -static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) +static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) { - unsigned long *new_page_dirs, *new_page_tables; - uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); - int ret; + struct i915_address_space *vm = &ppgtt->base; + struct i915_page_directory_pointer *pdp = &ppgtt->pdp; + struct i915_page_directory *pd; + u64 start = 0, length = ppgtt->base.total; + u64 from = start; + unsigned int pdpe; - /* We allocate temp bitmap for page tables for no gain - * but as this is for init only, lets keep the things simple - */ - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); - if (ret) - return ret; + gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - /* Allocate for all pdps regardless of how the ppgtt - * was defined. - */ - ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, - 0, 1ULL << 32, - new_page_dirs); - if (!ret) - *ppgtt->pdp.used_pdpes = *new_page_dirs; + gen8_initialize_pd(vm, pd); + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; + } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + pdp->used_pdpes++; /* never remove */ + return 0; - return ret; +unwind: + start -= from; + gen8_for_each_pdpe(pd, pdp, from, start, pdpe) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + free_pd(vm, pd); + } + pdp->used_pdpes = 0; + return -ENOMEM; } /* @@ -1586,17 +1306,19 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.start = 0; ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.allocate_va_range = gen8_alloc_va_range; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; - ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->debug_dump = gen8_dump_ppgtt; + /* There are only few exceptions for gen >=6. chv and bxt. + * And we are not sure about the latter so play safe for now. + */ + if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) + ppgtt->base.pt_kmap_wc = true; + if (USES_FULL_48BIT_PPGTT(dev_priv)) { - ret = setup_px(dev_priv, &ppgtt->pml4); + ret = setup_px(&ppgtt->base, &ppgtt->pml4); if (ret) goto free_scratch; @@ -1604,22 +1326,29 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = 1ULL << 48; ppgtt->switch_mm = gen8_48b_mm_switch; + + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; + ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; } else { - ret = __pdp_init(dev_priv, &ppgtt->pdp); + ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); if (ret) goto free_scratch; ppgtt->base.total = 1ULL << 32; ppgtt->switch_mm = gen8_legacy_mm_switch; - trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, - 0, 0, - GEN8_PML4E_SHIFT); if (intel_vgpu_active(dev_priv)) { - ret = gen8_preallocate_top_level_pdps(ppgtt); - if (ret) + ret = gen8_preallocate_top_level_pdp(ppgtt); + if (ret) { + __pdp_fini(&ppgtt->pdp); goto free_scratch; + } } + + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; + ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; } if (intel_vgpu_active(dev_priv)) @@ -1637,9 +1366,8 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_address_space *vm = &ppgtt->base; struct i915_page_table *unused; gen6_pte_t scratch_pte; - uint32_t pd_entry; - uint32_t pte, pde; - uint32_t start = ppgtt->base.start, length = ppgtt->base.total; + u32 pd_entry, pte, pde; + u32 start = 0, length = ppgtt->base.total; scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); @@ -1658,7 +1386,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) expected); seq_printf(m, "\tPDE: %x\n", pd_entry); - pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); + pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]); for (pte = 0; pte < GEN6_PTES; pte+=4) { unsigned long va = @@ -1681,55 +1409,46 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } seq_puts(m, "\n"); } - kunmap_px(ppgtt, pt_vaddr); + kunmap_atomic(pt_vaddr); } } /* Write pde (index) from the page directory @pd to the page table @pt */ -static void gen6_write_pde(struct i915_page_directory *pd, - const int pde, struct i915_page_table *pt) +static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) { /* Caller needs to make sure the write completes if necessary */ - struct i915_hw_ppgtt *ppgtt = - container_of(pd, struct i915_hw_ppgtt, pd); - u32 pd_entry; - - pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); - pd_entry |= GEN6_PDE_VALID; - - writel(pd_entry, ppgtt->pd_addr + pde); + writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + ppgtt->pd_addr + pde); } /* Write all the page tables found in the ppgtt structure to incrementing page * directories. */ -static void gen6_write_page_range(struct drm_i915_private *dev_priv, - struct i915_page_directory *pd, - uint32_t start, uint32_t length) +static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, + u32 start, u32 length) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_page_table *pt; - uint32_t pde; + unsigned int pde; - gen6_for_each_pde(pt, pd, start, length, pde) - gen6_write_pde(pd, pde, pt); + gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) + gen6_write_pde(ppgtt, pde, pt); - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); + mark_tlbs_dirty(ppgtt); + wmb(); } -static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) +static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) { - BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); - - return (ppgtt->pd.base.ggtt_offset / 64) << 16; + GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); + return ppgtt->pd.base.ggtt_offset << 10; } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; + u32 *cs; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ @@ -1737,17 +1456,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1755,8 +1474,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; + u32 *cs; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ @@ -1764,17 +1483,17 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (engine->id != RCS) { @@ -1813,7 +1532,7 @@ static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - uint32_t ecochk, ecobits; + u32 ecochk, ecobits; enum intel_engine_id id; ecobits = I915_READ(GAC_ECO_BITS); @@ -1837,7 +1556,7 @@ static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) { - uint32_t ecochk, gab_ctl, ecobits; + u32 ecochk, gab_ctl, ecobits; ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | @@ -1854,168 +1573,124 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) /* PPGTT support for Sandybdrige/Gen6 and later */ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen6_pte_t *pt_vaddr, scratch_pte; - unsigned first_entry = start >> PAGE_SHIFT; - unsigned num_entries = length >> PAGE_SHIFT; - unsigned act_pt = first_entry / GEN6_PTES; - unsigned first_pte = first_entry % GEN6_PTES; - unsigned last_pte, i; - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); + unsigned int first_entry = start >> PAGE_SHIFT; + unsigned int pde = first_entry / GEN6_PTES; + unsigned int pte = first_entry % GEN6_PTES; + unsigned int num_entries = length >> PAGE_SHIFT; + gen6_pte_t scratch_pte = + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); while (num_entries) { - last_pte = first_pte + num_entries; - if (last_pte > GEN6_PTES) - last_pte = GEN6_PTES; + struct i915_page_table *pt = ppgtt->pd.page_table[pde++]; + unsigned int end = min(pte + num_entries, GEN6_PTES); + gen6_pte_t *vaddr; - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + num_entries -= end - pte; - for (i = first_pte; i < last_pte; i++) - pt_vaddr[i] = scratch_pte; + /* Note that the hw doesn't support removing PDE on the fly + * (they are cached inside the context with no means to + * invalidate the cache), so we can only reset the PTE + * entries back to scratch. + */ - kunmap_px(ppgtt, pt_vaddr); + vaddr = kmap_atomic_px(pt); + do { + vaddr[pte++] = scratch_pte; + } while (pte < end); + kunmap_atomic(vaddr); - num_entries -= last_pte - first_pte; - first_pte = 0; - act_pt++; + pte = 0; } } static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 flags) + u64 start, + enum i915_cache_level cache_level, + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; - gen6_pte_t *pt_vaddr = NULL; - struct sgt_iter sgt_iter; - dma_addr_t addr; + const u32 pte_encode = vm->pte_encode(0, cache_level, flags); + struct sgt_dma iter; + gen6_pte_t *vaddr; + + vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); + iter.sg = pages->sgl; + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + do { + vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); - for_each_sgt_dma(addr, sgt_iter, pages) { - if (pt_vaddr == NULL) - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + iter.dma += PAGE_SIZE; + if (iter.dma == iter.max) { + iter.sg = __sg_next(iter.sg); + if (!iter.sg) + break; - pt_vaddr[act_pte] = - vm->pte_encode(addr, cache_level, flags); + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + } if (++act_pte == GEN6_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; - act_pt++; + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]); act_pte = 0; } - } - - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + } while (1); + kunmap_atomic(vaddr); } static int gen6_alloc_va_range(struct i915_address_space *vm, - uint64_t start_in, uint64_t length_in) + u64 start, u64 length) { - DECLARE_BITMAP(new_page_tables, I915_PDES); - struct drm_i915_private *dev_priv = vm->i915; - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; - uint32_t start, length, start_save, length_save; - uint32_t pde; - int ret; - - start = start_save = start_in; - length = length_save = length_in; + u64 from = start; + unsigned int pde; + bool flush = false; - bitmap_zero(new_page_tables, I915_PDES); - - /* The allocation is done in two stages so that we can bail out with - * minimal amount of pain. The first stage finds new page tables that - * need allocation. The second stage marks use ptes within the page - * tables. - */ gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - if (pt != vm->scratch_pt) { - WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); - continue; - } - - /* We've already allocated a page table */ - WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); + if (pt == vm->scratch_pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind_out; - pt = alloc_pt(dev_priv); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; + gen6_initialize_pt(vm, pt); + ppgtt->pd.page_table[pde] = pt; + gen6_write_pde(ppgtt, pde, pt); + flush = true; } - - gen6_initialize_pt(vm, pt); - - ppgtt->pd.page_table[pde] = pt; - __set_bit(pde, new_page_tables); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); } - start = start_save; - length = length_save; - - gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); - - bitmap_zero(tmp_bitmap, GEN6_PTES); - bitmap_set(tmp_bitmap, gen6_pte_index(start), - gen6_pte_count(start, length)); - - if (__test_and_clear_bit(pde, new_page_tables)) - gen6_write_pde(&ppgtt->pd, pde, pt); - - trace_i915_page_table_entry_map(vm, pde, pt, - gen6_pte_index(start), - gen6_pte_count(start, length), - GEN6_PTES); - bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, - GEN6_PTES); + if (flush) { + mark_tlbs_dirty(ppgtt); + wmb(); } - WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); - - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); - - mark_tlbs_dirty(ppgtt); return 0; unwind_out: - for_each_set_bit(pde, new_page_tables, I915_PDES) { - struct i915_page_table *pt = ppgtt->pd.page_table[pde]; - - ppgtt->pd.page_table[pde] = vm->scratch_pt; - free_pt(dev_priv, pt); - } - - mark_tlbs_dirty(ppgtt); - return ret; + gen6_ppgtt_clear_range(vm, from, start); + return -ENOMEM; } static int gen6_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return PTR_ERR(vm->scratch_pt); } @@ -2026,25 +1701,22 @@ static int gen6_init_scratch(struct i915_address_space *vm) static void gen6_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd = &ppgtt->pd; - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; - uint32_t pde; + u32 pde; drm_mm_remove_node(&ppgtt->node); gen6_for_all_pdes(pt, pd, pde) if (pt != vm->scratch_pt) - free_pt(dev_priv, pt); + free_pt(vm, pt); gen6_free_scratch(vm); } @@ -2077,6 +1749,12 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) if (ppgtt->node.start < ggtt->mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); + ppgtt->pd.base.ggtt_offset = + ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); + + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + + ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); + return 0; err_out: @@ -2090,10 +1768,10 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) } static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_page_table *unused; - uint32_t pde; + u32 pde; gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; @@ -2119,32 +1797,29 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.allocate_va_range = gen6_alloc_va_range; ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->base.cleanup = gen6_ppgtt_cleanup; - ppgtt->base.start = 0; ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; - ppgtt->pd.base.ggtt_offset = - ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); - - ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + - ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); - gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); - gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); + ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); + if (ret) { + gen6_ppgtt_cleanup(&ppgtt->base); + return ret; + } DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); - DRM_DEBUG("Adding PPGTT at offset %x\n", - ppgtt->pd.base.ggtt_offset << 10); + DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n", + ppgtt->pd.base.ggtt_offset << 10); return 0; } @@ -2153,6 +1828,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, struct drm_i915_private *dev_priv) { ppgtt->base.i915 = dev_priv; + ppgtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_INFO(dev_priv)->gen < 8) return gen6_ppgtt_init(ppgtt); @@ -2165,15 +1841,23 @@ static void i915_address_space_init(struct i915_address_space *vm, const char *name) { i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, vm->start, vm->total); + + drm_mm_init(&vm->mm, 0, vm->total); + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->unbound_list); + list_add_tail(&vm->global_link, &dev_priv->vm_list); + pagevec_init(&vm->free_pages, false); } static void i915_address_space_fini(struct i915_address_space *vm) { + if (pagevec_count(&vm->free_pages)) + vm_free_pages_release(vm); + i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); @@ -2185,34 +1869,17 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_SKYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); - else if (IS_BROXTON(dev_priv)) + else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } -static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv, - const char *name) -{ - int ret; - - ret = __hw_ppgtt_init(ppgtt, dev_priv); - if (ret == 0) { - kref_init(&ppgtt->ref); - i915_address_space_init(&ppgtt->base, dev_priv, name); - ppgtt->base.file = file_priv; - } - - return ret; -} - int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) { gtt_write_workarounds(dev_priv); @@ -2250,12 +1917,16 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret) { kfree(ppgtt); return ERR_PTR(ret); } + kref_init(&ppgtt->ref); + i915_address_space_init(&ppgtt->base, dev_priv, name); + ppgtt->base.file = fpriv; + trace_i915_ppgtt_create(&ppgtt->base); return ppgtt; @@ -2294,9 +1965,8 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.inactive_list)); WARN_ON(!list_empty(&ppgtt->base.unbound_list)); - i915_address_space_fini(&ppgtt->base); - ppgtt->base.cleanup(&ppgtt->base); + i915_address_space_fini(&ppgtt->base); kfree(ppgtt); } @@ -2358,7 +2028,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); i915_ggtt_invalidate(dev_priv); } @@ -2395,7 +2065,7 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void gen8_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 unused) { @@ -2410,32 +2080,22 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 unused) + u64 start, + enum i915_cache_level level, + u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen8_pte_t __iomem *gtt_entries; - gen8_pte_t gtt_entry; + const gen8_pte_t pte_encode = gen8_pte_encode(0, level); dma_addr_t addr; - int i = 0; - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; + gtt_entries += start >> PAGE_SHIFT; + for_each_sgt_dma(addr, sgt_iter, st) + gen8_set_pte(gtt_entries++, pte_encode | addr); - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = gen8_pte_encode(addr, level); - gen8_set_pte(>t_entries[i++], gtt_entry); - } - - /* - * XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readq(>t_entries[i-1]) != gtt_entry); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -2444,35 +2104,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, ggtt->invalidate(vm->i915); } -struct insert_entries { - struct i915_address_space *vm; - struct sg_table *st; - uint64_t start; - enum i915_cache_level level; - u32 flags; -}; - -static int gen8_ggtt_insert_entries__cb(void *_arg) -{ - struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->st, - arg->start, arg->level, arg->flags); - return 0; -} - -static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct sg_table *st, - uint64_t start, - enum i915_cache_level level, - u32 flags) -{ - struct insert_entries arg = { vm, st, start, level, flags }; - stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); -} - static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 flags) { @@ -2493,31 +2127,18 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, */ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 flags) + u64 start, + enum i915_cache_level level, + u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen6_pte_t __iomem *gtt_entries; - gen6_pte_t gtt_entry; + gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; + unsigned int i = start >> PAGE_SHIFT; + struct sgt_iter iter; dma_addr_t addr; - int i = 0; - - gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); - - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = vm->pte_encode(addr, level, flags); - iowrite32(gtt_entry, >t_entries[i++]); - } - - /* XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readl(>t_entries[i-1]) != gtt_entry); + for_each_sgt_dma(addr, iter, st) + iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -2527,17 +2148,19 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, } static void nop_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { } static void gen8_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned num_entries = length >> PAGE_SHIFT; - gen8_pte_t scratch_pte, __iomem *gtt_base = + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; int i; @@ -2547,16 +2170,12 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); for (i = 0; i < num_entries; i++) gen8_set_pte(>t_base[i], scratch_pte); - readl(gtt_base); } static void gen6_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; @@ -2576,12 +2195,11 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); - readl(gtt_base); } static void i915_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 unused) { @@ -2593,19 +2211,18 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm, static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 unused) + u64 start, + enum i915_cache_level cache_level, + u32 unused) { unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); - } static void i915_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } @@ -2616,14 +2233,16 @@ static int ggtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; - u32 pte_flags = 0; - int ret; + u32 pte_flags; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + int ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ + pte_flags = 0; if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -2642,6 +2261,15 @@ static int ggtt_bind_vma(struct i915_vma *vma, return 0; } +static void ggtt_unbind_vma(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = vma->vm->i915; + + intel_runtime_pm_get(i915); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + intel_runtime_pm_put(i915); +} + static int aliasing_gtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) @@ -2650,15 +2278,32 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; + if (flags & I915_VMA_LOCAL_BIND) { + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; + + if (appgtt->base.allocate_va_range) { + ret = appgtt->base.allocate_va_range(&appgtt->base, + vma->node.start, + vma->node.size); + if (ret) + return ret; + } + + appgtt->base.insert_entries(&appgtt->base, + vma->pages, vma->node.start, + cache_level, pte_flags); + } if (flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); @@ -2668,32 +2313,24 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, intel_runtime_pm_put(i915); } - if (flags & I915_VMA_LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - appgtt->base.insert_entries(&appgtt->base, - vma->pages, vma->node.start, - cache_level, pte_flags); - } - return 0; } -static void ggtt_unbind_vma(struct i915_vma *vma) +static void aliasing_gtt_unbind_vma(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - const u64 size = min(vma->size, vma->node.size); if (vma->flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); - vma->vm->clear_range(vma->vm, - vma->node.start, size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); intel_runtime_pm_put(i915); } - if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) - appgtt->base.clear_range(&appgtt->base, - vma->node.start, size); + if (vma->flags & I915_VMA_LOCAL_BIND) { + struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base; + + vm->clear_range(vm, vma->node.start, vma->size); + } } void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, @@ -2719,14 +2356,76 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, u64 *start, u64 *end) { - if (node->color != color) + if (node->allocated && node->color != color) *start += I915_GTT_PAGE_SIZE; + /* Also leave a space between the unallocated reserved node after the + * GTT and any objects within the GTT, i.e. we use the color adjustment + * to insert a guard page to prevent prefetches crossing over the + * GTT boundary. + */ node = list_next_entry(node, node_list); - if (node->allocated && node->color != color) + if (node->color != color) *end -= I915_GTT_PAGE_SIZE; } +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + int err; + + ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]"); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); + + if (WARN_ON(ppgtt->base.total < ggtt->base.total)) { + err = -ENODEV; + goto err_ppgtt; + } + + if (ppgtt->base.allocate_va_range) { + /* Note we only pre-allocate as far as the end of the global + * GTT. On 48b / 4-level page-tables, the difference is very, + * very significant! We have to preallocate as GVT/vgpu does + * not like the page directory disappearing. + */ + err = ppgtt->base.allocate_va_range(&ppgtt->base, + 0, ggtt->base.total); + if (err) + goto err_ppgtt; + } + + i915->mm.aliasing_ppgtt = ppgtt; + + WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); + ggtt->base.bind_vma = aliasing_gtt_bind_vma; + + WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); + ggtt->base.unbind_vma = aliasing_gtt_unbind_vma; + + return 0; + +err_ppgtt: + i915_ppgtt_put(ppgtt); + return err; +} + +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + + ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt); + if (!ppgtt) + return; + + i915_ppgtt_put(ppgtt); + + ggtt->base.bind_vma = ggtt_bind_vma; + ggtt->base.unbind_vma = ggtt_unbind_vma; +} + int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) { /* Let GEM Manage all of the aperture. @@ -2740,7 +2439,6 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) */ struct i915_ggtt *ggtt = &dev_priv->ggtt; unsigned long hole_start, hole_end; - struct i915_hw_ppgtt *ppgtt; struct drm_mm_node *entry; int ret; @@ -2769,38 +2467,13 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) ggtt->base.total - PAGE_SIZE, PAGE_SIZE); if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) { - ret = -ENOMEM; - goto err; - } - - ret = __hw_ppgtt_init(ppgtt, dev_priv); + ret = i915_gem_init_aliasing_ppgtt(dev_priv); if (ret) - goto err_ppgtt; - - if (ppgtt->base.allocate_va_range) { - ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, - ppgtt->base.total); - if (ret) - goto err_ppgtt_cleanup; - } - - ppgtt->base.clear_range(&ppgtt->base, - ppgtt->base.start, - ppgtt->base.total); - - dev_priv->mm.aliasing_ppgtt = ppgtt; - WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); - ggtt->base.bind_vma = aliasing_gtt_bind_vma; + goto err; } return 0; -err_ppgtt_cleanup: - ppgtt->base.cleanup(&ppgtt->base); -err_ppgtt: - kfree(ppgtt); err: drm_mm_remove_node(&ggtt->error_capture); return ret; @@ -2813,27 +2486,31 @@ err: void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma, *vn; - if (dev_priv->mm.aliasing_ppgtt) { - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->base.cleanup(&ppgtt->base); - kfree(ppgtt); - } + ggtt->base.closed = true; + + mutex_lock(&dev_priv->drm.struct_mutex); + WARN_ON(!list_empty(&ggtt->base.active_list)); + list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) + WARN_ON(i915_vma_unbind(vma)); + mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_cleanup_stolen(&dev_priv->drm); + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_fini_aliasing_ppgtt(dev_priv); + if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); if (drm_mm_initialized(&ggtt->base.mm)) { intel_vgt_deballoon(dev_priv); - - mutex_lock(&dev_priv->drm.struct_mutex); i915_address_space_fini(&ggtt->base); - mutex_unlock(&dev_priv->drm.struct_mutex); } ggtt->base.cleanup(&ggtt->base); + mutex_unlock(&dev_priv->drm.struct_mutex); arch_phys_wc_del(ggtt->mtrr); io_mapping_fini(&ggtt->mappable); @@ -2943,7 +2620,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return -ENOMEM; } - ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32); + ret = setup_scratch_page(&ggtt->base, GFP_DMA32); if (ret) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -2959,7 +2636,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) * writing this data shouldn't be harmful even in those cases. */ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ @@ -2994,7 +2671,7 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; /* * Map WB on BDW to snooped on CHV. @@ -3032,7 +2709,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); iounmap(ggtt->gsm); - cleanup_scratch_page(vm->i915, &vm->scratch_page); + cleanup_scratch_page(vm); } static int gen8_gmch_probe(struct i915_ggtt *ggtt) @@ -3078,8 +2755,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.clear_range = gen8_ggtt_clear_range; ggtt->base.insert_entries = gen8_ggtt_insert_entries; - if (IS_CHERRYVIEW(dev_priv)) - ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; ggtt->invalidate = gen6_ggtt_invalidate; @@ -3183,6 +2858,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) int ret; ggtt->base.i915 = dev_priv; + ggtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_GEN(dev_priv) <= 5) ret = i915_gmch_probe(ggtt); @@ -3242,14 +2918,14 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->vm_list); - /* Subtract the guard page before address space initialization to - * shrink the range used by drm_mm. + /* Note that we use page colouring to enforce a guard page at the + * end of the address space. This is required as the CS may prefetch + * beyond the end of the batch buffer, across the page boundary, + * and beyond the end of the GTT if we do not provide a guard. */ mutex_lock(&dev_priv->drm.struct_mutex); - ggtt->base.total -= PAGE_SIZE; i915_address_space_init(&ggtt->base, dev_priv, "[global]"); - ggtt->base.total += PAGE_SIZE; - if (!HAS_LLC(dev_priv)) + if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3303,7 +2979,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); /* First fill our portion of the GTT with scratch pages */ - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ @@ -3344,8 +3020,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) struct i915_address_space *vm; list_for_each_entry(vm, &dev_priv->vm_list, global_link) { - /* TODO: Perhaps it shouldn't be gen6 specific */ - struct i915_hw_ppgtt *ppgtt; if (i915_is_ggtt(vm)) @@ -3353,8 +3027,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) else ppgtt = i915_vm_to_ppgtt(vm); - gen6_write_page_range(dev_priv, &ppgtt->pd, - 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); } } @@ -3389,11 +3062,11 @@ rotate_pages(const dma_addr_t *in, unsigned int offset, return sg; } -static struct sg_table * -intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, - struct drm_i915_gem_object *obj) +static noinline struct sg_table * +intel_rotate_pages(struct intel_rotation_info *rot_info, + struct drm_i915_gem_object *obj) { - const size_t n_pages = obj->base.size / PAGE_SIZE; + const unsigned long n_pages = obj->base.size / PAGE_SIZE; unsigned int size = intel_rotation_info_size(rot_info); struct sgt_iter sgt_iter; dma_addr_t dma_addr; @@ -3452,7 +3125,7 @@ err_st_alloc: return ERR_PTR(ret); } -static struct sg_table * +static noinline struct sg_table * intel_partial_pages(const struct i915_ggtt_view *view, struct drm_i915_gem_object *obj) { @@ -3506,7 +3179,7 @@ err_st_alloc: static int i915_get_ggtt_vma_pages(struct i915_vma *vma) { - int ret = 0; + int ret; /* The vma->pages are only valid within the lifespan of the borrowed * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so @@ -3515,32 +3188,33 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) */ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); - if (vma->pages) + switch (vma->ggtt_view.type) { + case I915_GGTT_VIEW_NORMAL: + vma->pages = vma->obj->mm.pages; return 0; - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - vma->pages = vma->obj->mm.pages; - else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) + case I915_GGTT_VIEW_ROTATED: vma->pages = - intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated, - vma->obj); - else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) + intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + break; + + case I915_GGTT_VIEW_PARTIAL: vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); - else + break; + + default: WARN_ONCE(1, "GGTT view %u not implemented!\n", vma->ggtt_view.type); + return -EINVAL; + } - if (!vma->pages) { - DRM_ERROR("Failed to get pages for GGTT view type %u!\n", - vma->ggtt_view.type); - ret = -EINVAL; - } else if (IS_ERR(vma->pages)) { + ret = 0; + if (unlikely(IS_ERR(vma->pages))) { ret = PTR_ERR(vma->pages); vma->pages = NULL; DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", vma->ggtt_view.type, ret); } - return ret; } @@ -3743,3 +3417,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, size, alignment, color, start, end, DRM_MM_INSERT_EVICT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_gtt.c" +#include "selftests/i915_gem_gtt.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3c5ef5358cef..f7d4e194a227 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -36,9 +36,11 @@ #include <linux/io-mapping.h> #include <linux/mm.h> +#include <linux/pagevec.h> #include "i915_gem_timeline.h" #include "i915_gem_request.h" +#include "i915_selftest.h" #define I915_GTT_PAGE_SIZE 4096UL #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE @@ -51,11 +53,11 @@ struct drm_i915_file_private; struct drm_i915_fence_reg; -typedef uint32_t gen6_pte_t; -typedef uint64_t gen8_pte_t; -typedef uint64_t gen8_pde_t; -typedef uint64_t gen8_ppgtt_pdpe_t; -typedef uint64_t gen8_ppgtt_pml4e_t; +typedef u32 gen6_pte_t; +typedef u64 gen8_pte_t; +typedef u64 gen8_pde_t; +typedef u64 gen8_ppgtt_pdpe_t; +typedef u64 gen8_ppgtt_pml4e_t; #define ggtt_total_entries(ggtt) ((ggtt)->base.total >> PAGE_SHIFT) @@ -67,7 +69,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN6_PTE_UNCACHED (1 << 1) #define GEN6_PTE_VALID (1 << 0) -#define I915_PTES(pte_len) (PAGE_SIZE / (pte_len)) +#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) #define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) #define I915_PDES 512 #define I915_PDE_MASK (I915_PDES - 1) @@ -141,7 +143,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN8_PPAT_WC (1<<0) #define GEN8_PPAT_UC (0<<0) #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) -#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) struct sg_table; @@ -208,7 +210,7 @@ struct i915_page_dma { /* For gen6/gen7 only. This is the offset in the GGTT * where the page directory entries for PPGTT begin */ - uint32_t ggtt_offset; + u32 ggtt_offset; }; }; @@ -218,28 +220,24 @@ struct i915_page_dma { struct i915_page_table { struct i915_page_dma base; - - unsigned long *used_ptes; + unsigned int used_ptes; }; struct i915_page_directory { struct i915_page_dma base; - unsigned long *used_pdes; struct i915_page_table *page_table[I915_PDES]; /* PDEs */ + unsigned int used_pdes; }; struct i915_page_directory_pointer { struct i915_page_dma base; - - unsigned long *used_pdpes; struct i915_page_directory **page_directory; + unsigned int used_pdpes; }; struct i915_pml4 { struct i915_page_dma base; - - DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; }; @@ -247,6 +245,7 @@ struct i915_address_space { struct drm_mm mm; struct i915_gem_timeline timeline; struct drm_i915_private *i915; + struct device *dma; /* Every address space belongs to a struct file - except for the global * GTT that is owned by the driver (and so @file is set to NULL). In * principle, no information should leak from one context to another @@ -257,7 +256,6 @@ struct i915_address_space { */ struct drm_i915_file_private *file; struct list_head global_link; - u64 start; /* Start offset always 0 for dri2 */ u64 total; /* size addr space maps (ex. 2GB for ggtt) */ bool closed; @@ -297,6 +295,9 @@ struct i915_address_space { */ struct list_head unbound_list; + struct pagevec free_pages; + bool pt_kmap_wc; + /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, @@ -304,20 +305,19 @@ struct i915_address_space { /* flags for pte_encode */ #define PTE_READ_ONLY (1<<0) int (*allocate_va_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*clear_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*insert_page)(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 flags); void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level cache_level, u32 flags); + u64 start, + enum i915_cache_level cache_level, + u32 flags); void (*cleanup)(struct i915_address_space *vm); /** Unmap an object from an address space. This usually consists of * setting the valid PTE entries to a reserved scratch page. */ @@ -326,6 +326,8 @@ struct i915_address_space { int (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + + I915_SELFTEST_DECLARE(struct fault_attr fault_attr); }; #define i915_is_ggtt(V) (!(V)->file) @@ -381,7 +383,6 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; - int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); @@ -409,9 +410,9 @@ struct i915_hw_ppgtt { (pt = (pd)->page_table[iter], true); \ ++iter) -static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) +static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) { - const uint32_t mask = NUM_PTE(pde_shift) - 1; + const u32 mask = NUM_PTE(pde_shift) - 1; return (address >> PAGE_SHIFT) & mask; } @@ -420,11 +421,10 @@ static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) * does not cross a page table boundary, so the max value would be * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. */ -static inline uint32_t i915_pte_count(uint64_t addr, size_t length, - uint32_t pde_shift) +static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) { - const uint64_t mask = ~((1ULL << pde_shift) - 1); - uint64_t end; + const u64 mask = ~((1ULL << pde_shift) - 1); + u64 end; WARN_ON(length == 0); WARN_ON(offset_in_page(addr|length)); @@ -437,22 +437,22 @@ static inline uint32_t i915_pte_count(uint64_t addr, size_t length, return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); } -static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift) +static inline u32 i915_pde_index(u64 addr, u32 shift) { return (addr >> shift) & I915_PDE_MASK; } -static inline uint32_t gen6_pte_index(uint32_t addr) +static inline u32 gen6_pte_index(u32 addr) { return i915_pte_index(addr, GEN6_PDE_SHIFT); } -static inline size_t gen6_pte_count(uint32_t addr, uint32_t length) +static inline u32 gen6_pte_count(u32 addr, u32 length) { return i915_pte_count(addr, length, GEN6_PDE_SHIFT); } -static inline uint32_t gen6_pde_index(uint32_t addr) +static inline u32 gen6_pde_index(u32 addr) { return i915_pde_index(addr, GEN6_PDE_SHIFT); } @@ -485,27 +485,27 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp - start, length); \ start += temp, length -= temp; }), ++iter) -static inline uint32_t gen8_pte_index(uint64_t address) +static inline u32 gen8_pte_index(u64 address) { return i915_pte_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pde_index(uint64_t address) +static inline u32 gen8_pde_index(u64 address) { return i915_pde_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pdpe_index(uint64_t address) +static inline u32 gen8_pdpe_index(u64 address) { return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; } -static inline uint32_t gen8_pml4e_index(uint64_t address) +static inline u32 gen8_pml4e_index(u64 address) { return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } -static inline size_t gen8_pte_count(uint64_t address, uint64_t length) +static inline u64 gen8_pte_count(u64 address, u64 length) { return i915_pte_count(address, length, GEN8_PDE_SHIFT); } @@ -513,9 +513,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length) static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) { - return test_bit(n, ppgtt->pdp.used_pdpes) ? - px_dma(ppgtt->pdp.page_directory[n]) : - px_dma(ppgtt->base.scratch_pd); + return px_dma(ppgtt->pdp.page_directory[n]); } static inline struct i915_ggtt * @@ -525,6 +523,15 @@ i915_vm_to_ggtt(struct i915_address_space *vm) return container_of(vm, struct i915_ggtt, base); } +static inline bool +i915_vm_is_48bit(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915); +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915); + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 933019e1b206..fc950abbe400 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -35,8 +35,10 @@ static void internal_free_pages(struct sg_table *st) { struct scatterlist *sg; - for (sg = st->sgl; sg; sg = __sg_next(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } sg_free_table(st); kfree(st); @@ -133,6 +135,7 @@ create_st: return st; err: + sg_set_page(sg, NULL, 0, 0); sg_mark_end(sg); internal_free_pages(st); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 290eaa7fc9eb..2fb30a5eb510 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -33,6 +33,8 @@ #include <drm/i915_drm.h> +#include "i915_selftest.h" + struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 @@ -84,6 +86,7 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + I915_SELFTEST_DECLARE(struct list_head st_link); unsigned long flags; @@ -162,19 +165,23 @@ struct drm_i915_gem_object { struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ - unsigned long framebuffer_references; + atomic_t framebuffer_references; /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; + union { + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; + + unsigned long scratch; + }; /** for phys allocated objects */ struct drm_dma_handle *phys_handle; @@ -357,5 +364,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) return engine; } +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); + #endif diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f31deeb72703..fbfeb5f8d069 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -69,7 +69,6 @@ static void i915_fence_release(struct dma_fence *fence) * caught trying to reuse dead objects. */ i915_sw_fence_fini(&req->submit); - i915_sw_fence_fini(&req->execute); kmem_cache_free(req->i915->requests, req); } @@ -198,6 +197,89 @@ i915_priotree_init(struct i915_priotree *pt) pt->priority = INT_MIN; } +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) +{ + struct i915_gem_timeline *timeline = &i915->gt.global_timeline; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + /* Carefully retire all requests without writing to the rings */ + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + + i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests > 1); + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; + + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Finally reset hw state */ + tl->seqno = seqno; + intel_engine_init_global_seqno(engine, seqno); + } + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; + + memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); + } + } + + return 0; +} + +int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + return reset_all_global_seqno(dev_priv, seqno - 1); +} + +static int reserve_seqno(struct intel_engine_cs *engine) +{ + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; + int ret; + + /* Reservation is fine until we need to wrap around */ + if (likely(!add_overflows(seqno, active))) + return 0; + + ret = reset_all_global_seqno(engine->i915, 0); + if (ret) { + engine->timeline->inflight_seqnos--; + return ret; + } + + return 0; +} + +static void unreserve_seqno(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!engine->timeline->inflight_seqnos); + engine->timeline->inflight_seqnos--; +} + void i915_gem_retire_noop(struct i915_gem_active *active, struct drm_i915_gem_request *request) { @@ -211,7 +293,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); - GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute)); GEM_BUG_ON(!i915_gem_request_completed(request)); GEM_BUG_ON(!request->i915->gt.active_requests); @@ -237,6 +318,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) &request->i915->gt.idle_work, msecs_to_jiffies(100)); } + unreserve_seqno(request->engine); /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle @@ -307,88 +389,9 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) +static u32 timeline_get_seqno(struct intel_timeline *tl) { - struct i915_gem_timeline *timeline = &i915->gt.global_timeline; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret; - - /* Carefully retire all requests without writing to the rings */ - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret) - return ret; - - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests > 1); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) { - while (intel_breadcrumbs_busy(i915)) - cond_resched(); /* spin until threads are complete */ - } - atomic_set(&timeline->seqno, seqno); - - /* Finally reset hw state */ - for_each_engine(engine, i915, id) - intel_engine_init_global_seqno(engine, seqno); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for_each_engine(engine, i915, id) { - struct intel_timeline *tl = &timeline->engine[id]; - - memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); - } - } - - return 0; -} - -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - return i915_gem_init_global_seqno(dev_priv, seqno - 1); -} - -static int reserve_global_seqno(struct drm_i915_private *i915) -{ - u32 active_requests = ++i915->gt.active_requests; - u32 seqno = atomic_read(&i915->gt.global_timeline.seqno); - int ret; - - /* Reservation is fine until we need to wrap around */ - if (likely(seqno + active_requests > seqno)) - return 0; - - ret = i915_gem_init_global_seqno(i915, 0); - if (ret) { - i915->gt.active_requests--; - return ret; - } - - return 0; -} - -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) -{ - /* seqno only incremented under a mutex */ - return ++tl->seqno.counter; -} - -static u32 timeline_get_seqno(struct i915_gem_timeline *tl) -{ - return atomic_inc_return(&tl->seqno); + return ++tl->seqno; } void __i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -397,19 +400,17 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) struct intel_timeline *timeline; u32 seqno; + trace_i915_gem_request_execute(request); + /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); assert_spin_locked(&timeline->lock); - seqno = timeline_get_seqno(timeline->common); + seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno)); - request->previous_seqno = timeline->last_submitted_seqno; - timeline->last_submitted_seqno = seqno; - /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = seqno; @@ -417,7 +418,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) intel_engine_enable_signaling(request); spin_unlock(&request->lock); - GEM_BUG_ON(!request->global_seqno); engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); @@ -425,7 +425,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); - i915_sw_fence_commit(&request->execute); + wake_up_all(&request->execute); } void i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -441,33 +441,65 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static int __i915_sw_fence_call -submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) { - struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), submit); + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; - switch (state) { - case FENCE_COMPLETE: - request->engine->submit_request(request); - break; + assert_spin_locked(&engine->timeline->lock); - case FENCE_FREE: - i915_gem_request_put(request); - break; - } + /* Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + engine->timeline->seqno--; - return NOTIFY_DONE; + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = 0; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + spin_unlock(&request->lock); + + /* Transfer back from the global per-engine timeline to per-context */ + timeline = request->timeline; + GEM_BUG_ON(timeline == engine->timeline); + + spin_lock(&timeline->lock); + list_move(&request->link, &timeline->requests); + spin_unlock(&timeline->lock); + + /* We don't need to wake_up any waiters on request->execute, they + * will get woken by any other event or us re-adding this request + * to the engine timeline (__i915_gem_request_submit()). The waiters + * should be quite adapt at finding that the request now has a new + * global_seqno to the one they went to sleep on. + */ +} + +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_gem_request_unsubmit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); } static int __i915_sw_fence_call -execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), execute); + container_of(fence, typeof(*request), submit); switch (state) { case FENCE_COMPLETE: + trace_i915_gem_request_submit(request); + request->engine->submit_request(request); break; case FENCE_FREE: @@ -514,14 +546,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); - ret = reserve_global_seqno(dev_priv); + ret = reserve_seqno(engine); if (ret) goto err_unpin; /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); - if (req && __i915_gem_request_completed(req)) + if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); /* Beware: Dragons be flying overhead. @@ -566,17 +598,11 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, &i915_fence_ops, &req->lock, req->timeline->fence_context, - __timeline_get_seqno(req->timeline->common)); + timeline_get_seqno(req->timeline)); /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); - i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify); - - /* Ensure that the execute fence completes after the submit fence - - * as we complete the execute fence from within the submit fence - * callback, its completion would otherwise be visible first. - */ - i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq); + init_waitqueue_head(&req->execute); i915_priotree_init(&req->priotree); @@ -611,6 +637,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, */ req->head = req->ring->tail; + /* Check that we didn't interrupt ourselves with a new request */ + GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); return req; err_ctx: @@ -621,7 +649,7 @@ err_ctx: kmem_cache_free(dev_priv->requests, req); err_unreserve: - dev_priv->gt.active_requests--; + unreserve_seqno(engine); err_unpin: engine->context_unpin(engine, ctx); return ERR_PTR(ret); @@ -631,6 +659,7 @@ static int i915_gem_request_await_request(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from) { + u32 seqno; int ret; GEM_BUG_ON(to == from); @@ -653,14 +682,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret < 0 ? ret : 0; } - if (!from->global_seqno) { + seqno = i915_gem_request_global_seqno(from); + if (!seqno) { ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, GFP_KERNEL); return ret < 0 ? ret : 0; } - if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id]) + if (seqno <= to->timeline->sync_seqno[from->engine->id]) return 0; trace_i915_gem_ring_sync_to(to, from); @@ -678,7 +708,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - to->timeline->sync_seqno[from->engine->id] = from->global_seqno; + to->timeline->sync_seqno[from->engine->id] = seqno; return 0; } @@ -824,6 +854,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) struct intel_ring *ring = request->ring; struct intel_timeline *timeline = request->timeline; struct drm_i915_gem_request *prev; + u32 *cs; int err; lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -833,8 +864,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * our i915_gem_request_alloc() and called __i915_add_request() before * us, the timeline will hold its seqno which is later than ours. */ - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); + GEM_BUG_ON(timeline->seqno != request->fence.seqno); /* * To ensure that this call will not fail, space for its emissions @@ -862,10 +892,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - err = intel_ring_begin(request, engine->emit_breadcrumb_sz); - GEM_BUG_ON(err); - request->postfix = ring->tail; - ring->tail += engine->emit_breadcrumb_sz * sizeof(u32); + cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + GEM_BUG_ON(IS_ERR(cs)); + request->postfix = intel_ring_offset(request, cs); /* Seal the request and mark it as pending execution. Note that * we may inspect this state, without holding any locks, during @@ -889,16 +918,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) list_add_tail(&request->link, &timeline->requests); spin_unlock_irq(&timeline->lock); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); - - timeline->last_submitted_seqno = request->fence.seqno; + GEM_BUG_ON(timeline->seqno != request->fence.seqno); i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); request->emitted_jiffies = jiffies; - i915_gem_mark_busy(engine); + if (!request->i915->gt.active_requests++) + i915_gem_mark_busy(engine); /* Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority @@ -918,16 +945,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ } -static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) -{ - unsigned long flags; - - spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) - __add_wait_queue(q, wait); - spin_unlock_irqrestore(&q->lock, flags); -} - static unsigned long local_clock_us(unsigned int *cpu) { unsigned long t; @@ -961,9 +978,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) } bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) + u32 seqno, int state, unsigned long timeout_us) { - unsigned int cpu; + struct intel_engine_cs *engine = req->engine; + unsigned int irq, cpu; /* When waiting for high frequency requests, e.g. during synchronous * rendering split between the CPU and GPU, the finite amount of time @@ -975,11 +993,24 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, * takes to sleep on a request, on the order of a microsecond. */ + irq = atomic_read(&engine->irq_count); timeout_us += local_clock_us(&cpu); do { - if (__i915_gem_request_completed(req)) + if (seqno != i915_gem_request_global_seqno(req)) + break; + + if (i915_seqno_passed(intel_engine_get_seqno(req->engine), + seqno)) return true; + /* Seqno are meant to be ordered *before* the interrupt. If + * we see an interrupt without a corresponding seqno advance, + * assume we won't see one in the near future but require + * the engine->seqno_barrier() to fixup coherency. + */ + if (atomic_read(&engine->irq_count) != irq) + break; + if (signal_pending_state(state, current)) break; @@ -992,52 +1023,14 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, return false; } -static long -__i915_request_wait_for_execute(struct drm_i915_gem_request *request, - unsigned int flags, - long timeout) +static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request) { - const int state = flags & I915_WAIT_INTERRUPTIBLE ? - TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *q = &request->i915->gpu_error.wait_queue; - DEFINE_WAIT(reset); - DEFINE_WAIT(wait); - - if (flags & I915_WAIT_LOCKED) - add_wait_queue(q, &reset); - - do { - prepare_to_wait(&request->execute.wait, &wait, state); - - if (i915_sw_fence_done(&request->execute)) - break; - - if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&request->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(request->i915); - reset_wait_queue(q, &reset); - continue; - } - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - break; - } - - if (!timeout) { - timeout = -ETIME; - break; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - finish_wait(&request->execute.wait, &wait); - - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(q, &reset); + if (likely(!i915_reset_in_progress(&request->i915->gpu_error))) + return false; - return timeout; + __set_current_state(TASK_RUNNING); + i915_reset(request->i915); + return true; } /** @@ -1065,7 +1058,9 @@ long i915_wait_request(struct drm_i915_gem_request *req, { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - DEFINE_WAIT(reset); + wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; + DEFINE_WAIT_FUNC(reset, default_wake_function); + DEFINE_WAIT_FUNC(exec, default_wake_function); struct intel_wait wait; might_sleep(); @@ -1082,27 +1077,45 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (!timeout) return -ETIME; - trace_i915_gem_request_wait_begin(req); + trace_i915_gem_request_wait_begin(req, flags); + + add_wait_queue(&req->execute, &exec); + if (flags & I915_WAIT_LOCKED) + add_wait_queue(errq, &reset); + + intel_wait_init(&wait); - if (!i915_sw_fence_done(&req->execute)) { - timeout = __i915_request_wait_for_execute(req, flags, timeout); - if (timeout < 0) +restart: + do { + set_current_state(state); + if (intel_wait_update_request(&wait, req)) + break; + + if (flags & I915_WAIT_LOCKED && + __i915_wait_request_check_and_reset(req)) + continue; + + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; goto complete; + } - GEM_BUG_ON(!i915_sw_fence_done(&req->execute)); - } - GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); - GEM_BUG_ON(!req->global_seqno); + if (!timeout) { + timeout = -ETIME; + goto complete; + } + + timeout = io_schedule_timeout(timeout); + } while (1); + + GEM_BUG_ON(!intel_wait_has_seqno(&wait)); + GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); /* Optimistic short spin before touching IRQs */ if (i915_spin_request(req, state, 5)) goto complete; set_current_state(state); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt * as we enabled it, we need to kick ourselves to do a @@ -1110,6 +1123,9 @@ long i915_wait_request(struct drm_i915_gem_request *req, */ goto wakeup; + if (flags & I915_WAIT_LOCKED) + __i915_wait_request_check_and_reset(req); + for (;;) { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1123,7 +1139,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, timeout = io_schedule_timeout(timeout); - if (intel_wait_complete(&wait)) + if (intel_wait_complete(&wait) && + intel_wait_check_request(&wait, req)) break; set_current_state(state); @@ -1148,25 +1165,25 @@ wakeup: * itself, or indirectly by recovering the GPU). */ if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&req->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(req->i915); - reset_wait_queue(&req->i915->gpu_error.wait_queue, - &reset); + __i915_wait_request_check_and_reset(req)) continue; - } /* Only spin if we know the GPU is processing this request */ if (i915_spin_request(req, state, 2)) break; + + if (!intel_wait_check_request(&wait, req)) { + intel_engine_remove_wait(req->engine, &wait); + goto restart; + } } intel_engine_remove_wait(req->engine, &wait); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - __set_current_state(TASK_RUNNING); - complete: + __set_current_state(TASK_RUNNING); + if (flags & I915_WAIT_LOCKED) + remove_wait_queue(errq, &reset); + remove_wait_queue(&req->execute, &exec); trace_i915_gem_request_wait_end(req); return timeout; @@ -1175,14 +1192,21 @@ complete: static void engine_retire_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request, *next; + u32 seqno = intel_engine_get_seqno(engine); + LIST_HEAD(retire); + spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, &engine->timeline->requests, link) { - if (!__i915_gem_request_completed(request)) - return; + if (!i915_seqno_passed(seqno, request->global_seqno)) + break; - i915_gem_request_retire(request); + list_move_tail(&request->link, &retire); } + spin_unlock_irq(&engine->timeline->lock); + + list_for_each_entry_safe(request, next, &retire, link) + i915_gem_request_retire(request); } void i915_gem_retire_requests(struct drm_i915_private *dev_priv) @@ -1198,3 +1222,8 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) engine_retire_requests(engine); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_gem_request.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ea511f06efaf..5f73d8c0a38a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -119,18 +119,10 @@ struct drm_i915_gem_request { * The submit fence is used to await upon all of the request's * dependencies. When it is signaled, the request is ready to run. * It is used by the driver to then queue the request for execution. - * - * The execute fence is used to signal when the request has been - * sent to hardware. - * - * It is illegal for the submit fence of one request to wait upon the - * execute fence of an earlier request. It should be sufficient to - * wait upon the submit fence of the earlier request. */ struct i915_sw_fence submit; - struct i915_sw_fence execute; wait_queue_t submitq; - wait_queue_t execq; + wait_queue_head_t execute; /* A list of everyone we wait upon, and everyone who waits upon us. * Even though we will not be submitted to the hardware before the @@ -143,13 +135,12 @@ struct drm_i915_gem_request { struct i915_priotree priotree; struct i915_dependency dep; - u32 global_seqno; - - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. + /** GEM sequence number associated with this request on the + * global execution timeline. It is zero when the request is not + * on the HW queue (i.e. not on the engine timeline list). + * Its value is guarded by the timeline spinlock. */ - u32 previous_seqno; + u32 global_seqno; /** Position in the ring of the start of the request */ u32 head; @@ -243,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, *pdst = src; } +/** + * i915_gem_request_global_seqno - report the current global seqno + * @request - the request + * + * A request is assigned a global seqno only when it is on the hardware + * execution queue. The global seqno can be used to maintain a list of + * requests on the same engine in retirement order, for example for + * constructing a priority queue for waiting. Prior to its execution, or + * if it is subsequently removed in the event of preemption, its global + * seqno is zero. As both insertion and removal from the execution queue + * may operate in IRQ context, it is not guarded by the usual struct_mutex + * BKL. Instead those relying on the global seqno must be prepared for its + * value to change between reads. Only when the request is complete can + * the global seqno be stable (due to the memory barriers on submitting + * the commands to the hardware to write the breadcrumb, if the HWS shows + * that it has passed the global seqno and the global seqno is unchanged + * after the read, it is indeed complete). + */ +static u32 +i915_gem_request_global_seqno(const struct drm_i915_gem_request *request) +{ + return READ_ONCE(request->global_seqno); +} + int i915_gem_request_await_object(struct drm_i915_gem_request *to, struct drm_i915_gem_object *obj, @@ -259,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); void __i915_gem_request_submit(struct drm_i915_gem_request *request); void i915_gem_request_submit(struct drm_i915_gem_request *request); +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); + struct intel_rps_client; #define NO_WAITBOOST ERR_PTR(-1) #define IS_RPS_CLIENT(p) (!IS_ERR(p)) @@ -283,46 +301,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) } static inline bool -__i915_gem_request_started(const struct drm_i915_gem_request *req) +__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); + GEM_BUG_ON(!seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); + seqno - 1); } static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_started(req); + return __i915_gem_request_started(req, seqno); } static inline bool -__i915_gem_request_completed(const struct drm_i915_gem_request *req) +__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->global_seqno); + GEM_BUG_ON(!seqno); + return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) && + seqno == i915_gem_request_global_seqno(req); } static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_completed(req); + return __i915_gem_request_completed(req, seqno); } bool __i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us); + u32 seqno, int state, unsigned long timeout_us); static inline bool i915_spin_request(const struct drm_i915_gem_request *request, int state, unsigned long timeout_us) { - return (__i915_gem_request_started(request) && - __i915_spin_request(request, state, timeout_us)); + u32 seqno; + + seqno = i915_gem_request_global_seqno(request); + return (__i915_gem_request_started(request, seqno) && + __i915_spin_request(request, seqno, state, timeout_us)); } /* We treat requests as fences. This is not be to confused with our diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 401006b4c6a3..8bc515e8b2a2 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -207,7 +207,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!(flags & I915_SHRINK_ACTIVE) && (i915_gem_object_is_active(obj) || - obj->framebuffer_references)) + atomic_read(&obj->framebuffer_references))) continue; if (!can_release_pages(obj)) @@ -259,10 +259,13 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { unsigned long freed; + intel_runtime_pm_get(dev_priv); freed = i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); + intel_runtime_pm_put(dev_priv); + rcu_barrier(); /* wait until our RCU delayed slab frees are completed */ return freed; @@ -380,9 +383,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) return NOTIFY_DONE; - intel_runtime_pm_get(dev_priv); freed_pages = i915_gem_shrink_all(dev_priv); - intel_runtime_pm_put(dev_priv); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9673bcc3b6ad..f3abdc27c5dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -79,12 +79,12 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->mm.stolen_lock); } -static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) +static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct resource *r; - u32 base; + dma_addr_t base; /* Almost universally we can find the Graphics Base of Stolen Memory * at register BSM (0x5c) in the igfx configuration space. On a few @@ -189,14 +189,14 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) base = tom - tseg_size - ggtt->stolen_size; } - if (base == 0) + if (base == 0 || add_overflows(base, ggtt->stolen_size)) return 0; /* make sure we don't clobber the GTT if it's within stolen memory */ if (INTEL_GEN(dev_priv) <= 4 && !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { struct { - u32 start, end; + dma_addr_t start, end; } stolen[2] = { { .start = base, .end = base + ggtt->stolen_size, }, { .start = base, .end = base + ggtt->stolen_size, }, @@ -228,11 +228,13 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { + dma_addr_t end = base + ggtt->stolen_size - 1; + DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", (unsigned long long)ggtt_start, (unsigned long long)ggtt_end - 1); - DRM_DEBUG_KMS("Stolen memory adjusted to 0x%x-0x%x\n", - base, base + (u32)ggtt->stolen_size - 1); + DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n", + &base, &end); } } @@ -261,8 +263,10 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) * range. Apparently this works. */ if (r == NULL && !IS_GEN3(dev_priv)) { - DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", - base, base + (uint32_t)ggtt->stolen_size); + dma_addr_t end = base + ggtt->stolen_size; + + DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n", + &base, &end); base = 0; } } @@ -281,13 +285,13 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) } static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? CTG_STOLEN_RESERVED : ELK_STOLEN_RESERVED); - phys_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; @@ -304,7 +308,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -330,7 +334,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -350,7 +354,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -376,11 +380,11 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); - phys_addr_t stolen_top; + dma_addr_t stolen_top; stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; @@ -399,7 +403,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - phys_addr_t reserved_base, stolen_top; + dma_addr_t reserved_base, stolen_top; u32 reserved_total, reserved_size; u32 stolen_usable_start; @@ -420,7 +424,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (ggtt->stolen_size == 0) return 0; - dev_priv->mm.stolen_base = i915_stolen_to_physical(dev_priv); + dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv); if (dev_priv->mm.stolen_base == 0) return 0; @@ -469,8 +473,8 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (reserved_base < dev_priv->mm.stolen_base || reserved_base + reserved_size > stolen_top) { - phys_addr_t reserved_top = reserved_base + reserved_size; - DRM_DEBUG_KMS("Stolen reserved area [%pa - %pa] outside stolen memory [%pa - %pa]\n", + dma_addr_t reserved_top = reserved_base + reserved_size; + DRM_DEBUG_KMS("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n", &reserved_base, &reserved_top, &dev_priv->mm.stolen_base, &stolen_top); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 974ac08df473..46ade36dcee6 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -158,13 +158,8 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (stride > 8192) return false; - if (IS_GEN3(i915)) { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20) - return false; - } else { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 19) - return false; - } + if (!is_power_of_2(stride)) + return false; } if (IS_GEN2(i915) || @@ -176,12 +171,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (!stride || !IS_ALIGNED(stride, tile_width)) return false; - /* 965+ just needs multiples of tile width */ - if (INTEL_GEN(i915) >= 4) - return true; - - /* Pre-965 needs power of two tile widths */ - return is_power_of_2(stride); + return true; } static bool i915_vma_fence_prepare(struct i915_vma *vma, @@ -248,7 +238,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if ((tiling | stride) == obj->tiling_and_stride) return 0; - if (obj->framebuffer_references) + if (atomic_read(&obj->framebuffer_references)) return -EBUSY; /* We need to rebind the object if its current allocation diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index f2e51f42cc2f..6c53e14cab2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -33,7 +33,13 @@ struct i915_gem_timeline; struct intel_timeline { u64 fence_context; - u32 last_submitted_seqno; + u32 seqno; + + /** + * Count of outstanding requests, from the time they are constructed + * to the moment they are retired. Loosely coupled to hardware. + */ + u32 inflight_seqnos; spinlock_t lock; @@ -56,7 +62,6 @@ struct intel_timeline { struct i915_gem_timeline { struct list_head link; - atomic_t seqno; struct drm_i915_private *i915; const char *name; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9cd22cda17af..2b1d15668192 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -342,7 +342,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, } static void error_print_instdone(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { int slice; int subslice; @@ -372,7 +372,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - struct drm_i915_error_request *erq) + const struct drm_i915_error_request *erq) { if (!erq->seqno) return; @@ -384,8 +384,17 @@ static void error_print_request(struct drm_i915_error_state_buf *m, erq->head, erq->tail); } +static void error_print_context(struct drm_i915_error_state_buf *m, + const char *header, + const struct drm_i915_error_context *ctx) +{ + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", + header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, + ctx->ban_score, ctx->guilty, ctx->active); +} + static void error_print_engine(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); err_printf(m, " START: 0x%08x\n", ee->start); @@ -457,6 +466,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, error_print_request(m, " ELSP[0]: ", &ee->execlist[0]); error_print_request(m, " ELSP[1]: ", &ee->execlist[1]); + error_print_context(m, " Active context: ", &ee->context); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -536,21 +546,57 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m, #undef PRINT_FLAG } +static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + err_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + err_printf(m, "i915.%s=%s\n", name, *(const char **)x); + else + BUILD_BUG(); +} + +static void err_print_params(struct drm_i915_error_state_buf *m, + const struct i915_params *p) +{ +#define PRINT(T, x) err_print_param(m, #x, #T, &p->x); + I915_PARAMS_FOR_EACH(PRINT); +#undef PRINT +} + +static void err_print_pciid(struct drm_i915_error_state_buf *m, + struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + err_printf(m, "PCI ID: 0x%04x\n", pdev->device); + err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); + err_printf(m, "PCI Subsystem: %04x:%04x\n", + pdev->subsystem_vendor, + pdev->subsystem_device); +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, - const struct i915_error_state_file_priv *error_priv) + const struct i915_gpu_state *error) { - struct drm_i915_private *dev_priv = error_priv->i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_error_state *error = error_priv->error; + struct drm_i915_private *dev_priv = m->i915; struct drm_i915_error_object *obj; int i, j; if (!error) { - err_printf(m, "no error state collected\n"); - goto out; + err_printf(m, "No error state collected\n"); + return 0; } - err_printf(m, "%s\n", error->error_msg); + if (*error->error_msg) + err_printf(m, "%s\n", error->error_msg); err_printf(m, "Kernel: " UTS_RELEASE "\n"); err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); @@ -558,26 +604,22 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->boottime.tv_sec, error->boottime.tv_usec); err_printf(m, "Uptime: %ld s %ld us\n", error->uptime.tv_sec, error->uptime.tv_usec); - err_print_capabilities(m, &error->device_info); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && - error->engine[i].pid != -1) { - err_printf(m, "Active process (on ring %s): %s [%d], context bans %d\n", + error->engine[i].context.pid) { + err_printf(m, "Active process (on ring %s): %s [%d], score %d\n", engine_str(i), - error->engine[i].comm, - error->engine[i].pid, - error->engine[i].context_bans); + error->engine[i].context.comm, + error->engine[i].context.pid, + error->engine[i].context.ban_score); } } err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); - err_printf(m, "PCI ID: 0x%04x\n", pdev->device); - err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); - err_printf(m, "PCI Subsystem: %04x:%04x\n", - pdev->subsystem_vendor, - pdev->subsystem_device); + err_print_pciid(m, error->i915); + err_printf(m, "IOMMU enabled?: %d\n", error->iommu); if (HAS_CSR(dev_priv)) { @@ -592,19 +634,15 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); - if (INTEL_GEN(dev_priv) >= 8) { - for (i = 0; i < 4; i++) - err_printf(m, "GTIER gt %d: 0x%08x\n", i, - error->gtier[i]); - } else if (HAS_PCH_SPLIT(dev_priv) || IS_VALLEYVIEW(dev_priv)) - err_printf(m, "GTIER: 0x%08x\n", error->gtier[0]); + for (i = 0; i < error->ngtier; i++) + err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]); err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "CCID: 0x%08x\n", error->ccid); err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings); - for (i = 0; i < dev_priv->num_fence_regs; i++) + for (i = 0; i < error->nfence; i++) err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); if (INTEL_GEN(dev_priv) >= 6) { @@ -653,16 +691,18 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->pinned_bo_count); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - struct drm_i915_error_engine *ee = &error->engine[i]; + const struct drm_i915_error_engine *ee = &error->engine[i]; obj = ee->batchbuffer; if (obj) { err_puts(m, dev_priv->engine[i]->name); - if (ee->pid != -1) - err_printf(m, " (submitted by %s [%d], bans %d)", - ee->comm, - ee->pid, - ee->context_bans); + if (ee->context.pid) + err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)", + ee->context.comm, + ee->context.pid, + ee->context.handle, + ee->context.hw_id, + ee->context.ban_score); err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); @@ -716,9 +756,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, intel_overlay_print_error_state(m, error->overlay); if (error->display) - intel_display_print_error_state(m, dev_priv, error->display); + intel_display_print_error_state(m, error->display); + + err_print_capabilities(m, &error->device_info); + err_print_params(m, &error->params); -out: if (m->bytes == 0 && m->err) return m->err; @@ -770,10 +812,16 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) kfree(obj); } -static void i915_error_state_free(struct kref *error_ref) +static __always_inline void free_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + kfree(*(void **)x); +} + +void __i915_gpu_state_free(struct kref *error_ref) { - struct drm_i915_error_state *error = container_of(error_ref, - typeof(*error), ref); + struct i915_gpu_state *error = + container_of(error_ref, typeof(*error), ref); int i; for (i = 0; i < ARRAY_SIZE(error->engine); i++) { @@ -800,6 +848,11 @@ static void i915_error_state_free(struct kref *error_ref) kfree(error->overlay); kfree(error->display); + +#define FREE(T, x) free_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(FREE); +#undef FREE + kfree(error); } @@ -938,7 +991,7 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * It's only a small step better than a random number in its current form. */ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, int *engine_id) { uint32_t error_code = 0; @@ -963,20 +1016,21 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, } static void i915_gem_record_fences(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; - if (IS_GEN3(dev_priv) || IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 6) { for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ(FENCE_REG(i)); - } else if (IS_GEN5(dev_priv) || IS_GEN4(dev_priv)) { + error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); + } else if (INTEL_GEN(dev_priv) >= 4) { for (i = 0; i < dev_priv->num_fence_regs; i++) error->fence[i] = I915_READ64(FENCE_REG_965_LO(i)); - } else if (INTEL_GEN(dev_priv) >= 6) { + } else { for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); + error->fence[i] = I915_READ(FENCE_REG(i)); } + error->nfence = i; } static inline u32 @@ -1000,7 +1054,7 @@ gen8_engine_sync_index(struct intel_engine_cs *engine, return idx; } -static void gen8_record_semaphore_state(struct drm_i915_error_state *error, +static void gen8_record_semaphore_state(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1080,7 +1134,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, ee->waiters = waiter; for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = container_of(rb, typeof(*w), node); + struct intel_wait *w = rb_entry(rb, typeof(*w), node); strcpy(waiter->comm, w->tsk->comm); waiter->pid = w->tsk->pid; @@ -1093,7 +1147,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, spin_unlock_irq(&b->lock); } -static void error_record_engine_registers(struct drm_i915_error_state *error, +static void error_record_engine_registers(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1267,8 +1321,30 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine, &ee->execlist[n]); } +static void record_context(struct drm_i915_error_context *e, + struct i915_gem_context *ctx) +{ + if (ctx->pid) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(ctx->pid, PIDTYPE_PID); + if (task) { + strcpy(e->comm, task->comm); + e->pid = task->pid; + } + rcu_read_unlock(); + } + + e->handle = ctx->user_handle; + e->hw_id = ctx->hw_id; + e->ban_score = ctx->ban_score; + e->guilty = ctx->guilty_count; + e->active = ctx->active_count; +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; int i; @@ -1281,7 +1357,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_engine *ee = &error->engine[i]; struct drm_i915_gem_request *request; - ee->pid = -1; ee->engine_id = -1; if (!engine) @@ -1296,11 +1371,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, request = i915_gem_find_active_request(engine); if (request) { struct intel_ring *ring; - struct pid *pid; ee->vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; + record_context(&ee->context, request->ctx); + /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten * by userspace. @@ -1318,19 +1394,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, i915_error_object_create(dev_priv, request->ctx->engine[i].state); - pid = request->ctx->pid; - if (pid) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(pid, PIDTYPE_PID); - if (task) { - strcpy(ee->comm, task->comm); - ee->pid = task->pid; - } - rcu_read_unlock(); - } - error->simulated |= i915_gem_context_no_error_capture(request->ctx); @@ -1357,7 +1420,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, } static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, struct i915_address_space *vm, int idx) { @@ -1383,7 +1446,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, } static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int cnt = 0, i, j; @@ -1408,7 +1471,7 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, } static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_address_space *vm = &dev_priv->ggtt.base; struct drm_i915_error_buffer *bo; @@ -1439,7 +1502,7 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, } static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { /* Capturing log buf contents won't be useful if logging was disabled */ if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0)) @@ -1451,7 +1514,7 @@ static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, /* Capture all registers which don't fit into another category. */ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; @@ -1508,9 +1571,11 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, error->ier = I915_READ(GEN8_DE_MISC_IER); for (i = 0; i < 4; i++) error->gtier[i] = I915_READ(GEN8_GT_IER(i)); + error->ngtier = 4; } else if (HAS_PCH_SPLIT(dev_priv)) { error->ier = I915_READ(DEIER); error->gtier[0] = I915_READ(GTIER); + error->ngtier = 1; } else if (IS_GEN2(dev_priv)) { error->ier = I915_READ16(IER); } else if (!IS_VALLEYVIEW(dev_priv)) { @@ -1521,7 +1586,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, } static void i915_error_capture_msg(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, u32 engine_mask, const char *error_msg) { @@ -1534,12 +1599,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, "GPU HANG: ecode %d:%d:0x%08x", INTEL_GEN(dev_priv), engine_id, ecode); - if (engine_id != -1 && error->engine[engine_id].pid != -1) + if (engine_id != -1 && error->engine[engine_id].context.pid) len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine[engine_id].comm, - error->engine[engine_id].pid); + error->engine[engine_id].context.comm, + error->engine[engine_id].context.pid); scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", reason: %s, action: %s", @@ -1548,7 +1613,7 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, } static void i915_capture_gen_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU @@ -1562,9 +1627,26 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, sizeof(error->device_info)); } +static __always_inline void dup_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); +} + static int capture(void *data) { - struct drm_i915_error_state *error = data; + struct i915_gpu_state *error = data; + + do_gettimeofday(&error->time); + error->boottime = ktime_to_timeval(ktime_get_boottime()); + error->uptime = + ktime_to_timeval(ktime_sub(ktime_get(), + error->i915->gt.last_init_time)); + + error->params = i915; +#define DUP(T, x) dup_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(DUP); +#undef DUP i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); @@ -1574,12 +1656,6 @@ static int capture(void *data) i915_capture_pinned_buffers(error->i915, error); i915_gem_capture_guc_log_buffer(error->i915, error); - do_gettimeofday(&error->time); - error->boottime = ktime_to_timeval(ktime_get_boottime()); - error->uptime = - ktime_to_timeval(ktime_sub(ktime_get(), - error->i915->gt.last_init_time)); - error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); @@ -1588,6 +1664,23 @@ static int capture(void *data) #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) +struct i915_gpu_state * +i915_capture_gpu_state(struct drm_i915_private *i915) +{ + struct i915_gpu_state *error; + + error = kzalloc(sizeof(*error), GFP_ATOMIC); + if (!error) + return NULL; + + kref_init(&error->ref); + error->i915 = i915; + + stop_machine(capture, error, NULL); + + return error; +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -1602,7 +1695,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, const char *error_msg) { static bool warned; - struct drm_i915_error_state *error; + struct i915_gpu_state *error; unsigned long flags; if (!i915.error_capture) @@ -1611,18 +1704,12 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, if (READ_ONCE(dev_priv->gpu_error.first_error)) return; - /* Account for pipe specific data like PIPE*STAT */ - error = kzalloc(sizeof(*error), GFP_ATOMIC); + error = i915_capture_gpu_state(dev_priv); if (!error) { DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); return; } - kref_init(&error->ref); - error->i915 = dev_priv; - - stop_machine(capture, error, NULL); - i915_error_capture_msg(dev_priv, error, engine_mask, error_msg); DRM_INFO("%s\n", error->error_msg); @@ -1636,7 +1723,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } if (error) { - i915_error_state_free(&error->ref); + __i915_gpu_state_free(&error->ref); return; } @@ -1652,33 +1739,28 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } } -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv) +struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct i915_gpu_state *error; - spin_lock_irq(&dev_priv->gpu_error.lock); - error_priv->error = dev_priv->gpu_error.first_error; - if (error_priv->error) - kref_get(&error_priv->error->ref); - spin_unlock_irq(&dev_priv->gpu_error.lock); -} + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; + if (error) + i915_gpu_state_get(error); + spin_unlock_irq(&i915->gpu_error.lock); -void i915_error_state_put(struct i915_error_state_file_priv *error_priv) -{ - if (error_priv->error) - kref_put(&error_priv->error->ref, i915_error_state_free); + return error; } -void i915_destroy_error_state(struct drm_i915_private *dev_priv) +void i915_reset_error_state(struct drm_i915_private *i915) { - struct drm_i915_error_state *error; + struct i915_gpu_state *error; - spin_lock_irq(&dev_priv->gpu_error.lock); - error = dev_priv->gpu_error.first_error; - dev_priv->gpu_error.first_error = NULL; - spin_unlock_irq(&dev_priv->gpu_error.lock); + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; + i915->gpu_error.first_error = NULL; + spin_unlock_irq(&i915->gpu_error.lock); - if (error) - kref_put(&error->ref, i915_error_state_free); + i915_gpu_state_put(error); } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 8ced9e26f075..beec88a30347 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -518,6 +518,8 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); + trace_i915_gem_request_in(rq, 0); + b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7dba148ca792..312d30e96dc2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1033,9 +1033,12 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - smp_store_mb(engine->breadcrumbs.irq_posted, true); - if (intel_engine_wakeup(engine)) - trace_i915_gem_request_notify(engine); + bool waiters; + + atomic_inc(&engine->irq_count); + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + waiters = intel_engine_wakeup(engine); + trace_intel_engine_notify(engine, waiters); } static void vlv_c0_read(struct drm_i915_private *dev_priv, @@ -1173,20 +1176,12 @@ static void gen6_pm_rps_work(struct work_struct *work) if (new_delay >= dev_priv->rps.max_freq_softlimit) adj = 0; - /* - * For better performance, jump directly - * to RPe if we're below it. - */ - if (new_delay < dev_priv->rps.efficient_freq - adj) { - new_delay = dev_priv->rps.efficient_freq; - adj = 0; - } } else if (client_boost || any_waiters(dev_priv)) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) new_delay = dev_priv->rps.efficient_freq; - else + else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) new_delay = dev_priv->rps.min_freq_softlimit; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { @@ -1209,7 +1204,10 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay += adj; new_delay = clamp_t(int, new_delay, min, max); - intel_set_rps(dev_priv, new_delay); + if (intel_set_rps(dev_priv, new_delay)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + dev_priv->rps.last_adj = 0; + } mutex_unlock(&dev_priv->rps.hw_lock); } @@ -1349,8 +1347,11 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) notify_ring(engine); - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) - tasklet_schedule(&engine->irq_tasklet); + + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet_hi_schedule(&engine->irq_tasklet); + } } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, @@ -3090,19 +3091,9 @@ static u32 intel_hpd_enabled_irqs(struct drm_i915_private *dev_priv, return enabled_irqs; } -static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void ibx_hpd_detection_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; - - if (HAS_PCH_IBX(dev_priv)) { - hotplug_irqs = SDE_HOTPLUG_MASK; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); - } else { - hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); - } - - ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + u32 hotplug; /* * Enable digital hotplug on the PCH, and configure the DP short pulse @@ -3110,10 +3101,12 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) * The pulse duration bits are reserved on LPT+. */ hotplug = I915_READ(PCH_PORT_HOTPLUG); - hotplug &= ~(PORTD_PULSE_DURATION_MASK|PORTC_PULSE_DURATION_MASK|PORTB_PULSE_DURATION_MASK); - hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; - hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; + hotplug &= ~(PORTB_PULSE_DURATION_MASK | + PORTC_PULSE_DURATION_MASK | + PORTD_PULSE_DURATION_MASK); hotplug |= PORTB_HOTPLUG_ENABLE | PORTB_PULSE_DURATION_2ms; + hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; + hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; /* * When CPU and PCH are on the same package, port A * HPD must be enabled in both north and south. @@ -3123,6 +3116,23 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(PCH_PORT_HOTPLUG, hotplug); } +static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug_irqs, enabled_irqs; + + if (HAS_PCH_IBX(dev_priv)) { + hotplug_irqs = SDE_HOTPLUG_MASK; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); + } else { + hotplug_irqs = SDE_HOTPLUG_MASK_CPT; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); + } + + ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + + ibx_hpd_detection_setup(dev_priv); +} + static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) { u32 hotplug; @@ -3152,9 +3162,25 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) spt_hpd_detection_setup(dev_priv); } +static void ilk_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug; + + /* + * Enable digital hotplug on the CPU, and configure the DP short pulse + * duration to 2ms (which is the minimum in the Display Port spec) + * The pulse duration bits are reserved on HSW+. + */ + hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); + hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; + hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | + DIGITAL_PORTA_PULSE_DURATION_2ms; + I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); +} + static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; + u32 hotplug_irqs, enabled_irqs; if (INTEL_GEN(dev_priv) >= 8) { hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG; @@ -3173,15 +3199,7 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); } - /* - * Enable digital hotplug on the CPU, and configure the DP short pulse - * duration to 2ms (which is the minimum in the Display Port spec) - * The pulse duration bits are reserved on HSW+. - */ - hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); - hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; - hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | DIGITAL_PORTA_PULSE_DURATION_2ms; - I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); + ilk_hpd_detection_setup(dev_priv); ibx_hpd_irq_setup(dev_priv); } @@ -3252,7 +3270,7 @@ static void ibx_irq_postinstall(struct drm_device *dev) if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv)) - ; /* TODO: Enable HPD detection on older PCH platforms too */ + ibx_hpd_detection_setup(dev_priv); else spt_hpd_detection_setup(dev_priv); } @@ -3329,6 +3347,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev) gen5_gt_irq_postinstall(dev); + ilk_hpd_detection_setup(dev_priv); + ibx_irq_postinstall(dev); if (IS_IRONLAKE_M(dev_priv)) { @@ -3469,6 +3489,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (IS_GEN9_LP(dev_priv)) bxt_hpd_detection_setup(dev_priv); + else if (IS_BROADWELL(dev_priv)) + ilk_hpd_detection_setup(dev_priv); } static int gen8_irq_postinstall(struct drm_device *dev) @@ -4250,6 +4272,18 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (!IS_GEN2(dev_priv)) dev->vblank_disable_immediate = true; + /* Most platforms treat the display irq block as an always-on + * power domain. vlv/chv can disable it at runtime and need + * special care to avoid writing any of the display block registers + * outside of the power domain. We defer setting up the display irqs + * in this case to the runtime pm. + */ + dev_priv->display_irqs_enabled = true; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display_irqs_enabled = false; + + dev_priv->hotplug.hpd_storm_threshold = HPD_STORM_DEFAULT_THRESHOLD; + dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 0e280fbd52f1..2e9645e6555a 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -145,7 +145,7 @@ MODULE_PARM_DESC(enable_psr, "Enable PSR " "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) " "Default: -1 (use per-chip default)"); -module_param_named_unsafe(alpha_support, i915.alpha_support, int, 0400); +module_param_named_unsafe(alpha_support, i915.alpha_support, bool, 0400); MODULE_PARM_DESC(alpha_support, "Enable alpha quality driver support for latest hardware. " "See also CONFIG_DRM_I915_ALPHA_SUPPORT."); @@ -205,9 +205,9 @@ module_param_named(verbose_state_checks, i915.verbose_state_checks, bool, 0600); MODULE_PARM_DESC(verbose_state_checks, "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); -module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600); +module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0400); MODULE_PARM_DESC(nuclear_pageflip, - "Force atomic modeset functionality; asynchronous mode is not yet supported. (default: false)."); + "Force enable atomic functionality on platforms that don't have full support yet."); /* WA to get away with the default setting in VBT for early platforms.Will be removed */ module_param_named_unsafe(edp_vswing, i915.edp_vswing, int, 0400); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 8e433de04679..55d47eea172e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -27,46 +27,51 @@ #include <linux/cache.h> /* for __read_mostly */ +#define I915_PARAMS_FOR_EACH(func) \ + func(int, modeset); \ + func(int, panel_ignore_lid); \ + func(int, semaphores); \ + func(int, lvds_channel_mode); \ + func(int, panel_use_ssc); \ + func(int, vbt_sdvo_panel_type); \ + func(int, enable_rc6); \ + func(int, enable_dc); \ + func(int, enable_fbc); \ + func(int, enable_ppgtt); \ + func(int, enable_execlists); \ + func(int, enable_psr); \ + func(int, disable_power_well); \ + func(int, enable_ips); \ + func(int, invert_brightness); \ + func(int, enable_guc_loading); \ + func(int, enable_guc_submission); \ + func(int, guc_log_level); \ + func(int, use_mmio_flip); \ + func(int, mmio_debug); \ + func(int, edp_vswing); \ + func(unsigned int, inject_load_failure); \ + /* leave bools at the end to not create holes */ \ + func(bool, alpha_support); \ + func(bool, enable_cmd_parser); \ + func(bool, enable_hangcheck); \ + func(bool, fastboot); \ + func(bool, prefault_disable); \ + func(bool, load_detect_test); \ + func(bool, force_reset_modeset_test); \ + func(bool, reset); \ + func(bool, error_capture); \ + func(bool, disable_display); \ + func(bool, verbose_state_checks); \ + func(bool, nuclear_pageflip); \ + func(bool, enable_dp_mst); \ + func(bool, enable_dpcd_backlight); \ + func(bool, enable_gvt) + +#define MEMBER(T, member) T member struct i915_params { - int modeset; - int panel_ignore_lid; - int semaphores; - int lvds_channel_mode; - int panel_use_ssc; - int vbt_sdvo_panel_type; - int enable_rc6; - int enable_dc; - int enable_fbc; - int enable_ppgtt; - int enable_execlists; - int enable_psr; - unsigned int alpha_support; - int disable_power_well; - int enable_ips; - int invert_brightness; - int enable_guc_loading; - int enable_guc_submission; - int guc_log_level; - int use_mmio_flip; - int mmio_debug; - int edp_vswing; - unsigned int inject_load_failure; - /* leave bools at the end to not create holes */ - bool enable_cmd_parser; - bool enable_hangcheck; - bool fastboot; - bool prefault_disable; - bool load_detect_test; - bool force_reset_modeset_test; - bool reset; - bool error_capture; - bool disable_display; - bool verbose_state_checks; - bool nuclear_pageflip; - bool enable_dp_mst; - bool enable_dpcd_backlight; - bool enable_gvt; + I915_PARAMS_FOR_EACH(MEMBER); }; +#undef MEMBER extern struct i915_params i915 __read_mostly; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ecb487b5356f..732101ed57fb 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -27,6 +27,7 @@ #include <linux/vga_switcheroo.h> #include "i915_drv.h" +#include "i915_selftest.h" #define GEN_DEFAULT_PIPEOFFSETS \ .pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \ @@ -403,6 +404,7 @@ static const struct intel_device_info intel_geminilake_info = { .platform = INTEL_GEMINILAKE, .is_alpha_support = 1, .ddb_size = 1024, + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; static const struct intel_device_info intel_kabylake_info = { @@ -472,10 +474,19 @@ static const struct pci_device_id pciidlist[] = { }; MODULE_DEVICE_TABLE(pci, pciidlist); +static void i915_pci_remove(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + + i915_driver_unload(dev); + drm_dev_unref(dev); +} + static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct intel_device_info *intel_info = (struct intel_device_info *) ent->driver_data; + int err; if (IS_ALPHA_SUPPORT(intel_info) && !i915.alpha_support) { DRM_INFO("The driver support for your hardware in this kernel version is alpha quality\n" @@ -499,15 +510,17 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; - return i915_driver_load(pdev, ent); -} + err = i915_driver_load(pdev, ent); + if (err) + return err; -static void i915_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); + err = i915_live_selftests(pdev); + if (err) { + i915_pci_remove(pdev); + return err > 0 ? -ENOTTY : err; + } - i915_driver_unload(dev); - drm_dev_unref(dev); + return 0; } static struct pci_driver i915_pci_driver = { @@ -521,6 +534,11 @@ static struct pci_driver i915_pci_driver = { static int __init i915_init(void) { bool use_kms = true; + int err; + + err = i915_mock_selftests(); + if (err) + return err > 0 ? 0 : err; /* * Enable KMS by default, unless explicitly overriden by diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 675323189f2c..bdce90084d43 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -48,6 +48,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG); } +#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index]) + #define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a))) #define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b)) #define _PLANE(plane, a, b) _PIPE(plane, a, b) @@ -56,14 +58,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_TRANS(tran, a, b) _MMIO(_TRANS(tran, a, b)) #define _PORT(port, a, b) ((a) + (port)*((b)-(a))) #define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b)) -#define _PIPE3(pipe, a, b, c) ((pipe) == PIPE_A ? (a) : \ - (pipe) == PIPE_B ? (b) : (c)) +#define _PIPE3(pipe, ...) _PICK(pipe, __VA_ARGS__) #define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PIPE3(pipe, a, b, c)) -#define _PORT3(port, a, b, c) ((port) == PORT_A ? (a) : \ - (port) == PORT_B ? (b) : (c)) +#define _PORT3(port, ...) _PICK(port, __VA_ARGS__) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PORT3(pipe, a, b, c)) -#define _PHY3(phy, a, b, c) ((phy) == DPIO_PHY0 ? (a) : \ - (phy) == DPIO_PHY1 ? (b) : (c)) +#define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) #define _MASKED_FIELD(mask, value) ({ \ @@ -120,7 +119,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GCFGC 0xf0 /* 915+ only */ #define GC_LOW_FREQUENCY_ENABLE (1 << 7) #define GC_DISPLAY_CLOCK_190_200_MHZ (0 << 4) -#define GC_DISPLAY_CLOCK_333_MHZ (4 << 4) +#define GC_DISPLAY_CLOCK_333_320_MHZ (4 << 4) #define GC_DISPLAY_CLOCK_267_MHZ_PNV (0 << 4) #define GC_DISPLAY_CLOCK_333_MHZ_PNV (1 << 4) #define GC_DISPLAY_CLOCK_444_MHZ_PNV (2 << 4) @@ -1553,6 +1552,7 @@ enum skl_disp_power_wells { _MMIO(_BXT_PHY_CH(phy, ch, reg_ch0, reg_ch1)) #define BXT_P_CR_GT_DISP_PWRON _MMIO(0x138090) +#define MIPIO_RST_CTRL (1 << 2) #define _BXT_PHY_CTL_DDI_A 0x64C00 #define _BXT_PHY_CTL_DDI_B 0x64C10 @@ -3360,10 +3360,22 @@ enum { INTEL_LEGACY_64B_CONTEXT }; +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; + +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 -#define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ - INTEL_LEGACY_64B_CONTEXT : \ - INTEL_LEGACY_32B_CONTEXT) + +#define GEN8_CTX_ID_SHIFT 32 +#define GEN8_CTX_ID_WIDTH 21 #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) @@ -5871,11 +5883,18 @@ enum { #define _PLANE_KEYMSK_2_A 0x70298 #define _PLANE_KEYMAX_1_A 0x701a0 #define _PLANE_KEYMAX_2_A 0x702a0 +#define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ +#define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ +#define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ +#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_PLANE_GAMMA_DISABLE (1 << 13) #define _PLANE_BUF_CFG_1_A 0x7027c #define _PLANE_BUF_CFG_2_A 0x7037c #define _PLANE_NV12_BUF_CFG_1_A 0x70278 #define _PLANE_NV12_BUF_CFG_2_A 0x70378 + #define _PLANE_CTL_1_B 0x71180 #define _PLANE_CTL_2_B 0x71280 #define _PLANE_CTL_3_B 0x71380 @@ -5970,7 +5989,17 @@ enum { #define PLANE_NV12_BUF_CFG(pipe, plane) \ _MMIO_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe)) -/* SKL new cursor registers */ +#define _PLANE_COLOR_CTL_1_B 0x711CC +#define _PLANE_COLOR_CTL_2_B 0x712CC +#define _PLANE_COLOR_CTL_3_B 0x713CC +#define _PLANE_COLOR_CTL_1(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_1_A, _PLANE_COLOR_CTL_1_B) +#define _PLANE_COLOR_CTL_2(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_2_A, _PLANE_COLOR_CTL_2_B) +#define PLANE_COLOR_CTL(pipe, plane) \ + _MMIO_PLANE(plane, _PLANE_COLOR_CTL_1(pipe), _PLANE_COLOR_CTL_2(pipe)) + +#/* SKL new cursor registers */ #define _CUR_BUF_CFG_A 0x7017c #define _CUR_BUF_CFG_B 0x7117c #define CUR_BUF_CFG(pipe) _MMIO_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B) @@ -6450,6 +6479,11 @@ enum { #define CHICKEN_PAR2_1 _MMIO(0x42090) #define KVM_CONFIG_CHANGE_NOTIFICATION_SELECT (1 << 14) +#define CHICKEN_MISC_2 _MMIO(0x42084) +#define GLK_CL0_PWR_DOWN (1 << 10) +#define GLK_CL1_PWR_DOWN (1 << 11) +#define GLK_CL2_PWR_DOWN (1 << 12) + #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 #define HSW_FBCQ_DIS (1 << 22) @@ -8151,6 +8185,7 @@ enum { #define PAL_PREC_10_12_BIT (0 << 31) #define PAL_PREC_SPLIT_MODE (1 << 31) #define PAL_PREC_AUTO_INCREMENT (1 << 15) +#define PAL_PREC_INDEX_VALUE_MASK (0x3ff << 0) #define _PAL_PREC_DATA_A 0x4A404 #define _PAL_PREC_DATA_B 0x4AC04 #define _PAL_PREC_DATA_C 0x4B404 @@ -8160,12 +8195,26 @@ enum { #define _PAL_PREC_EXT_GC_MAX_A 0x4A420 #define _PAL_PREC_EXT_GC_MAX_B 0x4AC20 #define _PAL_PREC_EXT_GC_MAX_C 0x4B420 +#define _PAL_PREC_EXT2_GC_MAX_A 0x4A430 +#define _PAL_PREC_EXT2_GC_MAX_B 0x4AC30 +#define _PAL_PREC_EXT2_GC_MAX_C 0x4B430 #define PREC_PAL_INDEX(pipe) _MMIO_PIPE(pipe, _PAL_PREC_INDEX_A, _PAL_PREC_INDEX_B) #define PREC_PAL_DATA(pipe) _MMIO_PIPE(pipe, _PAL_PREC_DATA_A, _PAL_PREC_DATA_B) #define PREC_PAL_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_GC_MAX_A, _PAL_PREC_GC_MAX_B) + (i) * 4) #define PREC_PAL_EXT_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_EXT_GC_MAX_A, _PAL_PREC_EXT_GC_MAX_B) + (i) * 4) +#define _PRE_CSC_GAMC_INDEX_A 0x4A484 +#define _PRE_CSC_GAMC_INDEX_B 0x4AC84 +#define _PRE_CSC_GAMC_INDEX_C 0x4B484 +#define PRE_CSC_GAMC_AUTO_INCREMENT (1 << 10) +#define _PRE_CSC_GAMC_DATA_A 0x4A488 +#define _PRE_CSC_GAMC_DATA_B 0x4AC88 +#define _PRE_CSC_GAMC_DATA_C 0x4B488 + +#define PRE_CSC_GAMC_INDEX(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_INDEX_A, _PRE_CSC_GAMC_INDEX_B) +#define PRE_CSC_GAMC_DATA(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_DATA_A, _PRE_CSC_GAMC_DATA_B) + /* pipe CSC & degamma/gamma LUTs on CHV */ #define _CGM_PIPE_A_CSC_COEFF01 (VLV_DISPLAY_BASE + 0x67900) #define _CGM_PIPE_A_CSC_COEFF23 (VLV_DISPLAY_BASE + 0x67904) @@ -8317,6 +8366,12 @@ enum { #define _BXT_MIPIC_PORT_CTRL 0x6B8C0 #define BXT_MIPI_PORT_CTRL(tc) _MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL) +#define BXT_P_DSI_REGULATOR_CFG _MMIO(0x160020) +#define STAP_SELECT (1 << 0) + +#define BXT_P_DSI_REGULATOR_TX_CTRL _MMIO(0x160054) +#define HS_IO_CTRL_SELECT (1 << 0) + #define DPI_ENABLE (1 << 31) /* A + C */ #define MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT 27 #define MIPIA_MIPI4DPHY_DELAY_COUNT_MASK (0xf << 27) diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h new file mode 100644 index 000000000000..9d7d86f1733d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -0,0 +1,106 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __I915_SELFTEST_H__ +#define __I915_SELFTEST_H__ + +struct pci_dev; +struct drm_i915_private; + +struct i915_selftest { + unsigned long timeout_jiffies; + unsigned int timeout_ms; + unsigned int random_seed; + int mock; + int live; +}; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include <linux/fault-inject.h> + +extern struct i915_selftest i915_selftest; + +int i915_mock_selftests(void); +int i915_live_selftests(struct pci_dev *pdev); + +/* We extract the function declarations from i915_mock_selftests.h and + * i915_live_selftests.h Add your unit test declarations there! + * + * Mock unit tests are run very early upon module load, before the driver + * is probed. All hardware interactions, as well as other subsystems, must + * be "mocked". + * + * Live unit tests are run after the driver is loaded - all hardware + * interactions are real. + */ +#define selftest(name, func) int func(void); +#include "selftests/i915_mock_selftests.h" +#undef selftest +#define selftest(name, func) int func(struct drm_i915_private *i915); +#include "selftests/i915_live_selftests.h" +#undef selftest + +struct i915_subtest { + int (*func)(void *data); + const char *name; +}; + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data); +#define i915_subtests(T, data) \ + __i915_subtests(__func__, T, ARRAY_SIZE(T), data) + +#define SUBTEST(x) { x, #x } + +#define I915_SELFTEST_DECLARE(x) x +#define I915_SELFTEST_ONLY(x) unlikely(x) + +#else /* !IS_ENABLED(CONFIG_DRM_I915_SELFTEST) */ + +static inline int i915_mock_selftests(void) { return 0; } +static inline int i915_live_selftests(struct pci_dev *pdev) { return 0; } + +#define I915_SELFTEST_DECLARE(x) +#define I915_SELFTEST_ONLY(x) 0 + +#endif + +/* Using the i915_selftest_ prefix becomes a little unwieldy with the helpers. + * Instead we use the igt_ shorthand, in reference to the intel-gpu-tools + * suite of uabi test cases (which includes a test runner for our selftests). + */ + +#define IGT_TIMEOUT(name__) \ + unsigned long name__ = jiffies + i915_selftest.timeout_jiffies + +__printf(2, 3) +bool __igt_timeout(unsigned long timeout, const char *fmt, ...); + +#define igt_timeout(t, fmt, ...) \ + __igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + +#define igt_can_mi_store_dword_imm(D) (INTEL_GEN(D) > 2) + +#endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 376ac957cd1c..af0ac9f261fd 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -395,13 +395,13 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; + return ret ?: count; } static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -448,14 +448,13 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; - + return ret ?: count; } static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); @@ -523,33 +522,27 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; - struct i915_error_state_file_priv error_priv; struct drm_i915_error_state_buf error_str; - ssize_t ret_count = 0; - int ret; - - memset(&error_priv, 0, sizeof(error_priv)); + struct i915_gpu_state *gpu; + ssize_t ret; - ret = i915_error_state_buf_init(&error_str, to_i915(dev), count, off); + ret = i915_error_state_buf_init(&error_str, dev_priv, count, off); if (ret) return ret; - error_priv.i915 = dev_priv; - i915_error_state_get(dev, &error_priv); - - ret = i915_error_state_to_str(&error_str, &error_priv); + gpu = i915_first_error_state(dev_priv); + ret = i915_error_state_to_str(&error_str, gpu); if (ret) goto out; - ret_count = count < error_str.bytes ? count : error_str.bytes; + ret = count < error_str.bytes ? count : error_str.bytes; + memcpy(buf, error_str.buf, ret); - memcpy(buf, error_str.buf, ret_count); out: - i915_error_state_put(&error_priv); + i915_gpu_state_put(gpu); i915_error_state_buf_release(&error_str); - return ret ?: ret_count; + return ret; } static ssize_t error_state_write(struct file *file, struct kobject *kobj, @@ -560,7 +553,7 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj, struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); return count; } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 4461df5a94fe..2dbef3147a60 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -175,134 +175,6 @@ TRACE_EVENT(i915_vma_unbind, __entry->obj, __entry->offset, __entry->size, __entry->vm) ); -TRACE_EVENT(i915_va_alloc, - TP_PROTO(struct i915_vma *vma), - TP_ARGS(vma), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vma->vm; - __entry->start = vma->node.start; - __entry->end = vma->node.start + vma->node.size - 1; - ), - - TP_printk("vm=%p (%c), 0x%llx-0x%llx", - __entry->vm, i915_is_ggtt(__entry->vm) ? 'G' : 'P', __entry->start, __entry->end) -); - -DECLARE_EVENT_CLASS(i915_px_entry, - TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 px_shift), - TP_ARGS(vm, px, start, px_shift), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, px) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->px = px; - __entry->start = start; - __entry->end = ((start + (1ULL << px_shift)) & ~((1ULL << px_shift)-1)) - 1; - ), - - TP_printk("vm=%p, pde=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift), - TP_ARGS(vm, pde, start, pde_shift) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, u64 pdpe_shift), - TP_ARGS(vm, pdpe, start, pdpe_shift), - - TP_printk("vm=%p, pdpe=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), - TP_ARGS(vm, pml4e, start, pml4e_shift), - - TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -/* Avoid extra math because we only support two sizes. The format is defined by - * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ -#define TRACE_PT_SIZE(bits) \ - ((((bits) == 1024) ? 288 : 144) + 1) - -DECLARE_EVENT_CLASS(i915_page_table_entry_update, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, pde) - __field(u32, first) - __field(u32, last) - __dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits)) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->pde = pde; - __entry->first = first; - __entry->last = first + count - 1; - scnprintf(__get_str(cur_ptes), - TRACE_PT_SIZE(bits), - "%*pb", - bits, - pt->used_ptes); - ), - - TP_printk("vm=%p, pde=%d, updating %u:%u\t%s", - __entry->vm, __entry->pde, __entry->last, __entry->first, - __get_str(cur_ptes)) -); - -DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits) -); - -TRACE_EVENT(i915_gem_object_change_domain, - TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write), - TP_ARGS(obj, old_read, old_write), - - TP_STRUCT__entry( - __field(struct drm_i915_gem_object *, obj) - __field(u32, read_domains) - __field(u32, write_domain) - ), - - TP_fast_assign( - __entry->obj = obj; - __entry->read_domains = obj->base.read_domains | (old_read << 16); - __entry->write_domain = obj->base.write_domain | (old_write << 16); - ), - - TP_printk("obj=%p, read=%02x=>%02x, write=%02x=>%02x", - __entry->obj, - __entry->read_domains >> 16, - __entry->read_domains & 0xffff, - __entry->write_domain >> 16, - __entry->write_domain & 0xffff) -); - TRACE_EVENT(i915_gem_object_pwrite, TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), TP_ARGS(obj, offset, len), @@ -503,13 +375,14 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->seqno) ); -TRACE_EVENT(i915_gem_ring_dispatch, +TRACE_EVENT(i915_gem_request_queue, TP_PROTO(struct drm_i915_gem_request *req, u32 flags), TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) __field(u32, flags) ), @@ -517,13 +390,14 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; __entry->flags = flags; - dma_fence_enable_sw_signaling(&req->fence); ), - TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", - __entry->dev, __entry->ring, __entry->seqno, __entry->flags) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->flags) ); TRACE_EVENT(i915_gem_ring_flush, @@ -555,18 +429,23 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_STRUCT__entry( __field(u32, dev) + __field(u32, ctx) __field(u32, ring) __field(u32, seqno) + __field(u32, global) ), TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->ctx = req->ctx->hw_id; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_add, @@ -574,24 +453,100 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_add, TP_ARGS(req) ); -TRACE_EVENT(i915_gem_request_notify, - TP_PROTO(struct intel_engine_cs *engine), - TP_ARGS(engine), +#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +DEFINE_EVENT(i915_gem_request, i915_gem_request_submit, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); + +DECLARE_EVENT_CLASS(i915_gem_request_hw, + TP_PROTO(struct drm_i915_gem_request *req, + unsigned int port), + TP_ARGS(req, port), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, ring) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, ctx) + __field(u32, port) + ), + + TP_fast_assign( + __entry->dev = req->i915->drm.primary->index; + __entry->ring = req->engine->id; + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; + __entry->global_seqno = req->global_seqno; + __entry->port = port; + ), + + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", + __entry->dev, __entry->ring, __entry->ctx, + __entry->seqno, __entry->global_seqno, + __entry->port) +); + +DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, + TP_PROTO(struct drm_i915_gem_request *req, unsigned int port), + TP_ARGS(req, port) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_out, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); +#else +#if !defined(TRACE_HEADER_MULTI_READ) +static inline void +trace_i915_gem_request_submit(struct drm_i915_gem_request *req) +{ +} + +static inline void +trace_i915_gem_request_execute(struct drm_i915_gem_request *req) +{ +} + +static inline void +trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port) +{ +} + +static inline void +trace_i915_gem_request_out(struct drm_i915_gem_request *req) +{ +} +#endif +#endif + +TRACE_EVENT(intel_engine_notify, + TP_PROTO(struct intel_engine_cs *engine, bool waiters), + TP_ARGS(engine, waiters), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) __field(u32, seqno) + __field(bool, waiters) ), TP_fast_assign( __entry->dev = engine->i915->drm.primary->index; __entry->ring = engine->id; __entry->seqno = intel_engine_get_seqno(engine); + __entry->waiters = waiters; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, seqno=%u, waiters=%u", + __entry->dev, __entry->ring, __entry->seqno, + __entry->waiters) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, @@ -599,20 +554,17 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, TP_ARGS(req) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_complete, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) -); - TRACE_EVENT(i915_gem_request_wait_begin, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req), + TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), + TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) - __field(bool, blocking) + __field(u32, global) + __field(unsigned int, flags) ), /* NB: the blocking information is racy since mutex_is_locked @@ -624,14 +576,16 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; - __entry->blocking = - mutex_is_locked(&req->i915->drm.struct_mutex); + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; + __entry->flags = flags; ), - TP_printk("dev=%u, ring=%u, seqno=%u, blocking=%s", - __entry->dev, __entry->ring, - __entry->seqno, __entry->blocking ? "yes (NB)" : "no") + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), + __entry->flags) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, @@ -769,17 +723,19 @@ DECLARE_EVENT_CLASS(i915_context, TP_STRUCT__entry( __field(u32, dev) __field(struct i915_gem_context *, ctx) + __field(u32, hw_id) __field(struct i915_address_space *, vm) ), TP_fast_assign( + __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; + __entry->hw_id = ctx->hw_id; __entry->vm = ctx->ppgtt ? &ctx->ppgtt->base : NULL; - __entry->dev = ctx->i915->drm.primary->index; ), - TP_printk("dev=%u, ctx=%p, ctx_vm=%p", - __entry->dev, __entry->ctx, __entry->vm) + TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", + __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id) ) DEFINE_EVENT(i915_context, i915_context_create, diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 34020873e1f6..b8ba0f2f92af 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,6 +25,17 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H +#if GCC_VERSION >= 70000 +#define add_overflows(A, B) \ + __builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0) +#else +#define add_overflows(A, B) ({ \ + typeof(A) a = (A); \ + typeof(B) b = (B); \ + a + b < a; \ +}) +#endif + #define range_overflows(start, size, max) ({ \ typeof(start) start__ = (start); \ typeof(size) size__ = (size); \ diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index d0abfd08a01c..14014068dfcf 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -179,7 +179,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, int intel_vgt_balloon(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - unsigned long ggtt_end = ggtt->base.start + ggtt->base.total; + unsigned long ggtt_end = ggtt->base.total; unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; @@ -202,8 +202,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) DRM_INFO("Unmappable graphic memory: base 0x%lx size %ldKiB\n", unmappable_base, unmappable_size / 1024); - if (mappable_base < ggtt->base.start || - mappable_end > ggtt->mappable_end || + if (mappable_end > ggtt->mappable_end || unmappable_base < ggtt->mappable_end || unmappable_end > ggtt_end) { DRM_ERROR("Invalid ballooning configuration!\n"); @@ -231,9 +230,9 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) } /* Mappable graphic memory ballooning */ - if (mappable_base > ggtt->base.start) { + if (mappable_base) { ret = vgt_balloon_space(ggtt, &bl_info.space[0], - ggtt->base.start, mappable_base); + 0, mappable_base); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 155906e84812..6e9eade304b8 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -78,6 +78,9 @@ vma_create(struct drm_i915_gem_object *obj, struct rb_node *rb, **p; int i; + /* The aliasing_ppgtt should never be used directly! */ + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + vma = kmem_cache_zalloc(vm->i915->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); @@ -238,7 +241,15 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 vma_flags; int ret; - if (WARN_ON(flags == 0)) + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->size > vma->node.size); + + if (GEM_WARN_ON(range_overflows(vma->node.start, + vma->node.size, + vma->vm->total))) + return -ENODEV; + + if (GEM_WARN_ON(!flags)) return -EINVAL; bind_flags = 0; @@ -255,20 +266,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (bind_flags == 0) return 0; - if (GEM_WARN_ON(range_overflows(vma->node.start, - vma->node.size, - vma->vm->total))) - return -ENODEV; - - if (vma_flags == 0 && vma->vm->allocate_va_range) { - trace_i915_va_alloc(vma); - ret = vma->vm->allocate_va_range(vma->vm, - vma->node.start, - vma->node.size); - if (ret) - return ret; - } - trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) @@ -324,8 +321,8 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma) __i915_gem_object_release_unless_active(obj); } -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) { if (!drm_mm_node_allocated(&vma->node)) return false; @@ -687,3 +684,6 @@ destroy: return 0; } +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_vma.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e39d922cfb6f..2e03f81dddbe 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -228,8 +228,8 @@ i915_vma_compare(struct i915_vma *vma, int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index aa9160e7f1d8..e65818e5d2ab 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -121,7 +121,7 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, /** * intel_atomic_setup_scalers() - setup scalers for crtc per staged requests - * @dev: DRM device + * @dev_priv: i915 device * @crtc: intel crtc * @crtc_state: incoming crtc_state to validate and setup scalers * @@ -136,9 +136,9 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, * 0 - scalers were setup succesfully * error code - otherwise */ -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state) { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; @@ -199,7 +199,7 @@ int intel_atomic_setup_scalers(struct drm_device *dev, */ if (!plane) { struct drm_plane_state *state; - plane = drm_plane_from_index(dev, i); + plane = drm_plane_from_index(&dev_priv->drm, i); state = drm_atomic_get_plane_state(drm_state, plane); if (IS_ERR(state)) { DRM_DEBUG_KMS("Failed to add [PLANE:%d] to drm_state\n", @@ -247,7 +247,9 @@ int intel_atomic_setup_scalers(struct drm_device *dev, } /* set scaler mode */ - if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { + if (IS_GEMINILAKE(dev_priv)) { + scaler_state->scalers[*scaler_id].mode = 0; + } else if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { /* * when only 1 scaler is in use on either pipe A or B, * scaler 0 operates in high quality (HQ) mode. diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 16c202781db0..1ab401faed34 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -702,7 +702,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev, struct drm_i915_private *dev_priv = kdev_to_i915(kdev); u32 tmp; - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) + if (!IS_GEN9_BC(dev_priv)) return; i915_audio_component_get_power(kdev); @@ -734,7 +734,7 @@ static int i915_audio_component_get_cdclk_freq(struct device *kdev) if (WARN_ON_ONCE(!HAS_DDI(dev_priv))) return -ENODEV; - return dev_priv->cdclk_freq; + return dev_priv->cdclk.hw.cdclk; } /* diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index fcfa423d08bd..027c93e34c97 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,6 +26,11 @@ #include "i915_drv.h" +static unsigned long wait_timeout(void) +{ + return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); +} + static void intel_breadcrumbs_hangcheck(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -34,8 +39,18 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) if (!b->irq_enabled) return; - if (time_before(jiffies, b->timeout)) { - mod_timer(&b->hangcheck, b->timeout); + if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { + b->hangcheck_interrupts = atomic_read(&engine->irq_count); + mod_timer(&b->hangcheck, wait_timeout()); + return; + } + + /* If the waiter was currently running, assume it hasn't had a chance + * to process the pending interrupt (e.g, low priority task on a loaded + * system) and wait until it sleeps before declaring a missed interrupt. + */ + if (!intel_engine_wakeup(engine)) { + mod_timer(&b->hangcheck, wait_timeout()); return; } @@ -55,11 +70,6 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) i915_queue_hangcheck(engine->i915); } -static unsigned long wait_timeout(void) -{ - return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); -} - static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -81,7 +91,7 @@ static void irq_enable(struct intel_engine_cs *engine) * we still need to force the barrier before reading the seqno, * just in case. */ - engine->breadcrumbs.irq_posted = true; + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* Caller disables interrupts */ spin_lock(&engine->i915->irq_lock); @@ -95,8 +105,23 @@ static void irq_disable(struct intel_engine_cs *engine) spin_lock(&engine->i915->irq_lock); engine->irq_disable(engine); spin_unlock(&engine->i915->irq_lock); +} + +static bool use_fake_irq(const struct intel_breadcrumbs *b) +{ + const struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); - engine->breadcrumbs.irq_posted = false; + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) + return false; + + /* Only start with the heavy weight fake irq timer if we have not + * seen any interrupts since enabling it the first time. If the + * interrupts are still arriving, it means we made a mistake in our + * engine->seqno_barrier(), a timing error that should be transient + * and unlikely to reoccur. + */ + return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; } static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) @@ -109,6 +134,18 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) if (b->rpm_wakelock) return; + if (I915_SELFTEST_ONLY(b->mock)) { + /* For our mock objects we want to avoid interaction + * with the real hardware (which is not set up). So + * we simply pretend we have enabled the powerwell + * and the irq, and leave it up to the mock + * implementation to call intel_engine_wakeup() + * itself when it wants to simulate a user interrupt, + */ + b->rpm_wakelock = true; + return; + } + /* Since we are waiting on a request, the GPU should be busy * and should have its own rpm reference. For completeness, * record an rpm reference for ourselves to cover the @@ -124,13 +161,12 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) b->irq_enabled = true; } - if (!b->irq_enabled || - test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { + if (!b->irq_enabled || use_fake_irq(b)) { mod_timer(&b->fake_irq, jiffies + 1); + i915_queue_hangcheck(i915); } else { /* Ensure we never sleep indefinitely */ - GEM_BUG_ON(!time_after(b->timeout, jiffies)); - mod_timer(&b->hangcheck, b->timeout); + mod_timer(&b->hangcheck, wait_timeout()); } } @@ -143,6 +179,11 @@ static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) if (!b->rpm_wakelock) return; + if (I915_SELFTEST_ONLY(b->mock)) { + b->rpm_wakelock = false; + return; + } + if (b->irq_enabled) { irq_disable(engine); b->irq_enabled = false; @@ -242,7 +283,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!next && !first); if (next && next != &wait->node) { GEM_BUG_ON(first); - b->timeout = wait_timeout(); b->first_wait = to_wait(next); rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); /* As there is a delay between reading the current @@ -256,7 +296,8 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * in case the seqno passed. */ __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) + if (test_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted)) wake_up_process(to_wait(next)->tsk); } @@ -269,7 +310,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (first) { GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); - b->timeout = wait_timeout(); b->first_wait = wait; rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); /* After assigning ourselves as the new bottom-half, we must @@ -316,22 +356,15 @@ static inline int wakeup_priority(struct intel_breadcrumbs *b, return tsk->prio; } -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +static void __intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* Quick check to see if this waiter was already decoupled from - * the tree by the bottom-half to avoid contention on the spinlock - * by the herd. - */ - if (RB_EMPTY_NODE(&wait->node)) - return; - - spin_lock_irq(&b->lock); + assert_spin_locked(&b->lock); if (RB_EMPTY_NODE(&wait->node)) - goto out_unlock; + goto out; if (b->first_wait == wait) { const int priority = wakeup_priority(b, wait->tsk); @@ -379,7 +412,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, * the interrupt, or if we have to handle an * exception rather than a seqno completion. */ - b->timeout = wait_timeout(); b->first_wait = to_wait(next); rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); if (b->first_wait->seqno != wait->seqno) @@ -397,15 +429,36 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); rb_erase(&wait->node, &b->waiters); -out_unlock: +out: GEM_BUG_ON(b->first_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); +} + +void intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + /* Quick check to see if this waiter was already decoupled from + * the tree by the bottom-half to avoid contention on the spinlock + * by the herd. + */ + if (RB_EMPTY_NODE(&wait->node)) + return; + + spin_lock_irq(&b->lock); + __intel_engine_remove_wait(engine, wait); spin_unlock_irq(&b->lock); } -static bool signal_complete(struct drm_i915_gem_request *request) +static bool signal_valid(const struct drm_i915_gem_request *request) +{ + return intel_wait_check_request(&request->signaling.wait, request); +} + +static bool signal_complete(const struct drm_i915_gem_request *request) { if (!request) return false; @@ -414,7 +467,7 @@ static bool signal_complete(struct drm_i915_gem_request *request) * signalled that this wait is already completed. */ if (intel_wait_complete(&request->signaling.wait)) - return true; + return signal_valid(request); /* Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. @@ -457,40 +510,62 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - request = READ_ONCE(b->first_signal); + rcu_read_lock(); + request = rcu_dereference(b->first_signal); + if (request) + request = i915_gem_request_get_rcu(request); + rcu_read_unlock(); if (signal_complete(request)) { - /* Wake up all other completed waiters and select the - * next bottom-half for the next user interrupt. - */ - intel_engine_remove_wait(engine, - &request->signaling.wait); - local_bh_disable(); dma_fence_signal(&request->fence); local_bh_enable(); /* kick start the tasklets */ + spin_lock_irq(&b->lock); + + /* Wake up all other completed waiters and select the + * next bottom-half for the next user interrupt. + */ + __intel_engine_remove_wait(engine, + &request->signaling.wait); + /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may * have installed an even older signal than the one * we just completed - so double check we are still * the oldest before picking the next one. */ - spin_lock_irq(&b->lock); - if (request == b->first_signal) { + if (request == rcu_access_pointer(b->first_signal)) { struct rb_node *rb = rb_next(&request->signaling.node); - b->first_signal = rb ? to_signaler(rb) : NULL; + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); } rb_erase(&request->signaling.node, &b->signals); + RB_CLEAR_NODE(&request->signaling.node); + spin_unlock_irq(&b->lock); i915_gem_request_put(request); } else { - if (kthread_should_stop()) + DEFINE_WAIT(exec); + + if (kthread_should_stop()) { + GEM_BUG_ON(request); break; + } + + if (request) + add_wait_queue(&request->execute, &exec); schedule(); + + if (request) + remove_wait_queue(&request->execute, &exec); + + if (kthread_should_park()) + kthread_parkme(); } + i915_gem_request_put(request); } while (1); __set_current_state(TASK_RUNNING); @@ -503,6 +578,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *parent, **p; bool first, wakeup; + u32 seqno; /* Note that we may be called from an interrupt handler on another * device (e.g. nouveau signaling a fence completion causing us @@ -513,11 +589,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ assert_spin_locked(&request->lock); - if (!request->global_seqno) + + seqno = i915_gem_request_global_seqno(request); + if (!seqno) return; request->signaling.wait.tsk = b->signaler; - request->signaling.wait.seqno = request->global_seqno; + request->signaling.wait.seqno = seqno; i915_gem_request_get(request); spin_lock(&b->lock); @@ -541,8 +619,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->global_seqno, - to_signaler(parent)->global_seqno)) { + if (i915_seqno_passed(seqno, + to_signaler(parent)->signaling.wait.seqno)) { p = &parent->rb_right; first = false; } else { @@ -552,7 +630,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) rb_link_node(&request->signaling.node, parent, p); rb_insert_color(&request->signaling.node, &b->signals); if (first) - smp_store_mb(b->first_signal, request); + rcu_assign_pointer(b->first_signal, request); spin_unlock(&b->lock); @@ -560,6 +638,35 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) wake_up_process(b->signaler); } +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + assert_spin_locked(&request->lock); + GEM_BUG_ON(!request->signaling.wait.seqno); + + spin_lock(&b->lock); + + if (!RB_EMPTY_NODE(&request->signaling.node)) { + if (request == rcu_access_pointer(b->first_signal)) { + struct rb_node *rb = + rb_next(&request->signaling.node); + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); + } + rb_erase(&request->signaling.node, &b->signals); + RB_CLEAR_NODE(&request->signaling.node); + i915_gem_request_put(request); + } + + __intel_engine_remove_wait(engine, &request->signaling.wait); + + spin_unlock(&b->lock); + + request->signaling.wait.seqno = 0; +} + int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -607,9 +714,8 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) __intel_breadcrumbs_disable_irq(b); if (intel_engine_has_waiter(engine)) { - b->timeout = wait_timeout(); __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) + if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) wake_up_process(b->first_wait->tsk); } else { /* sanitize the IMR and unmask any auxiliary interrupts */ @@ -626,7 +732,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) /* The engines should be idle and all requests accounted for! */ WARN_ON(READ_ONCE(b->first_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(READ_ONCE(b->first_signal)); + WARN_ON(rcu_access_pointer(b->first_signal)); WARN_ON(!RB_EMPTY_ROOT(&b->signals)); if (!IS_ERR_OR_NULL(b->signaler)) @@ -635,29 +741,28 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int mask = 0; - - for_each_engine(engine, i915, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->lock); + struct intel_breadcrumbs *b = &engine->breadcrumbs; + bool busy = false; - if (b->first_wait) { - wake_up_process(b->first_wait->tsk); - mask |= intel_engine_flag(engine); - } + spin_lock_irq(&b->lock); - if (b->first_signal) { - wake_up_process(b->signaler); - mask |= intel_engine_flag(engine); - } + if (b->first_wait) { + wake_up_process(b->first_wait->tsk); + busy |= intel_engine_flag(engine); + } - spin_unlock_irq(&b->lock); + if (rcu_access_pointer(b->first_signal)) { + wake_up_process(b->signaler); + busy |= intel_engine_flag(engine); } - return mask; + spin_unlock_irq(&b->lock); + + return busy; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_breadcrumbs.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c new file mode 100644 index 000000000000..d643c0c5321b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -0,0 +1,1891 @@ +/* + * Copyright © 2006-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "intel_drv.h" + +/** + * DOC: CDCLK / RAWCLK + * + * The display engine uses several different clocks to do its work. There + * are two main clocks involved that aren't directly related to the actual + * pixel clock or any symbol/bit clock of the actual output port. These + * are the core display clock (CDCLK) and RAWCLK. + * + * CDCLK clocks most of the display pipe logic, and thus its frequency + * must be high enough to support the rate at which pixels are flowing + * through the pipes. Downscaling must also be accounted as that increases + * the effective pixel rate. + * + * On several platforms the CDCLK frequency can be changed dynamically + * to minimize power consumption for a given display configuration. + * Typically changes to the CDCLK frequency require all the display pipes + * to be shut down while the frequency is being changed. + * + * On SKL+ the DMC will toggle the CDCLK off/on during DC5/6 entry/exit. + * DMC will not change the active CDCLK frequency however, so that part + * will still be performed by the driver directly. + * + * RAWCLK is a fixed frequency clock, often used by various auxiliary + * blocks such as AUX CH or backlight PWM. Hence the only thing we + * really need to know about RAWCLK is its frequency so that various + * dividers can be programmed correctly. + */ + +static void fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 133333; +} + +static void fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 200000; +} + +static void fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 266667; +} + +static void fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 333333; +} + +static void fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 400000; +} + +static void fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 450000; +} + +static void i85x_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 hpllcc = 0; + + /* + * 852GM/852GMV only supports 133 MHz and the HPLLCC + * encoding is different :( + * FIXME is this the right way to detect 852GM/852GMV? + */ + if (pdev->revision == 0x1) { + cdclk_state->cdclk = 133333; + return; + } + + pci_bus_read_config_word(pdev->bus, + PCI_DEVFN(0, 3), HPLLCC, &hpllcc); + + /* Assume that the hardware is in the high speed state. This + * should be the default. + */ + switch (hpllcc & GC_CLOCK_CONTROL_MASK) { + case GC_CLOCK_133_200: + case GC_CLOCK_133_200_2: + case GC_CLOCK_100_200: + cdclk_state->cdclk = 200000; + break; + case GC_CLOCK_166_250: + cdclk_state->cdclk = 250000; + break; + case GC_CLOCK_100_133: + cdclk_state->cdclk = 133333; + break; + case GC_CLOCK_133_266: + case GC_CLOCK_133_266_2: + case GC_CLOCK_166_266: + cdclk_state->cdclk = 266667; + break; + } +} + +static void i915gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + cdclk_state->cdclk = 333333; + break; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + cdclk_state->cdclk = 190000; + break; + } +} + +static void i945gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + cdclk_state->cdclk = 320000; + break; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + cdclk_state->cdclk = 200000; + break; + } +} + +static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) +{ + static const unsigned int blb_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 6400000, + }; + static const unsigned int pnv_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 2666667, + }; + static const unsigned int cl_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 3333333, + [5] = 3566667, + [6] = 4266667, + }; + static const unsigned int elk_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + }; + static const unsigned int ctg_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 2666667, + [5] = 4266667, + }; + const unsigned int *vco_table; + unsigned int vco; + uint8_t tmp = 0; + + /* FIXME other chipsets? */ + if (IS_GM45(dev_priv)) + vco_table = ctg_vco; + else if (IS_G4X(dev_priv)) + vco_table = elk_vco; + else if (IS_I965GM(dev_priv)) + vco_table = cl_vco; + else if (IS_PINEVIEW(dev_priv)) + vco_table = pnv_vco; + else if (IS_G33(dev_priv)) + vco_table = blb_vco; + else + return 0; + + tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); + + vco = vco_table[tmp & 0x7]; + if (vco == 0) + DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); + else + DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); + + return vco; +} + +static void g33_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; + static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; + static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; + static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; + const uint8_t *div_table; + unsigned int cdclk_sel; + uint16_t tmp = 0; + + cdclk_state->vco = intel_hpll_vco(dev_priv); + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 4) & 0x7; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (cdclk_state->vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 4800000: + div_table = div_4800; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", + cdclk_state->vco, tmp); + cdclk_state->cdclk = 190476; +} + +static void pnv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_267_MHZ_PNV: + cdclk_state->cdclk = 266667; + break; + case GC_DISPLAY_CLOCK_333_MHZ_PNV: + cdclk_state->cdclk = 333333; + break; + case GC_DISPLAY_CLOCK_444_MHZ_PNV: + cdclk_state->cdclk = 444444; + break; + case GC_DISPLAY_CLOCK_200_MHZ_PNV: + cdclk_state->cdclk = 200000; + break; + default: + DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); + case GC_DISPLAY_CLOCK_133_MHZ_PNV: + cdclk_state->cdclk = 133333; + break; + case GC_DISPLAY_CLOCK_167_MHZ_PNV: + cdclk_state->cdclk = 166667; + break; + } +} + +static void i965gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 16, 10, 8 }; + static const uint8_t div_4000[] = { 20, 12, 10 }; + static const uint8_t div_5333[] = { 24, 16, 14 }; + const uint8_t *div_table; + unsigned int cdclk_sel; + uint16_t tmp = 0; + + cdclk_state->vco = intel_hpll_vco(dev_priv); + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = ((tmp >> 8) & 0x1f) - 1; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (cdclk_state->vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", + cdclk_state->vco, tmp); + cdclk_state->cdclk = 200000; +} + +static void gm45_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int cdclk_sel; + uint16_t tmp = 0; + + cdclk_state->vco = intel_hpll_vco(dev_priv); + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 12) & 0x1; + + switch (cdclk_state->vco) { + case 2666667: + case 4000000: + case 5333333: + cdclk_state->cdclk = cdclk_sel ? 333333 : 222222; + break; + case 3200000: + cdclk_state->cdclk = cdclk_sel ? 320000 : 228571; + break; + default: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", + cdclk_state->vco, tmp); + cdclk_state->cdclk = 222222; + break; + } +} + +static void hsw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + cdclk_state->cdclk = 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + cdclk_state->cdclk = 450000; + else if (freq == LCPLL_CLK_FREQ_450) + cdclk_state->cdclk = 450000; + else if (IS_HSW_ULT(dev_priv)) + cdclk_state->cdclk = 337500; + else + cdclk_state->cdclk = 540000; +} + +static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, + int max_pixclk) +{ + int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? + 333333 : 320000; + int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; + + /* + * We seem to get an unstable or solid color picture at 200MHz. + * Not sure what's wrong. For now use 200MHz only when all pipes + * are off. + */ + if (!IS_CHERRYVIEW(dev_priv) && + max_pixclk > freq_320*limit/100) + return 400000; + else if (max_pixclk > 266667*limit/100) + return freq_320; + else if (max_pixclk > 0) + return 266667; + else + return 200000; +} + +static void vlv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); + cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", + CCK_DISPLAY_CLOCK_CONTROL, + cdclk_state->vco); +} + +static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) +{ + unsigned int credits, default_credits; + + if (IS_CHERRYVIEW(dev_priv)) + default_credits = PFI_CREDIT(12); + else + default_credits = PFI_CREDIT(8); + + if (dev_priv->cdclk.hw.cdclk >= dev_priv->czclk_freq) { + /* CHV suggested value is 31 or 63 */ + if (IS_CHERRYVIEW(dev_priv)) + credits = PFI_CREDIT_63; + else + credits = PFI_CREDIT(15); + } else { + credits = default_credits; + } + + /* + * WA - write default credits before re-programming + * FIXME: should we also set the resend bit here? + */ + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + default_credits); + + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + credits | PFI_CREDIT_RESEND); + + /* + * FIXME is this guaranteed to clear + * immediately or should we poll for it? + */ + WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); +} + +static void vlv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + u32 val, cmd; + + if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ + cmd = 2; + else if (cdclk == 266667) + cmd = 1; + else + cmd = 0; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK; + val |= (cmd << DSPFREQGUAR_SHIFT); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + mutex_lock(&dev_priv->sb_lock); + + if (cdclk == 400000) { + u32 divider; + + divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, + cdclk) - 1; + + /* adjust cdclk divider */ + val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); + val &= ~CCK_FREQUENCY_VALUES; + val |= divider; + vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); + + if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & + CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), + 50)) + DRM_ERROR("timed out waiting for CDclk change\n"); + } + + /* adjust self-refresh exit latency value */ + val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); + val &= ~0x7f; + + /* + * For high bandwidth configs, we set a higher latency in the bunit + * so that the core display fetch happens in time to avoid underruns. + */ + if (cdclk == 400000) + val |= 4500 / 250; /* 4.5 usec */ + else + val |= 3000 / 250; /* 3.0 usec */ + vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); + + mutex_unlock(&dev_priv->sb_lock); + + intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); +} + +static void chv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + u32 val, cmd; + + switch (cdclk) { + case 333333: + case 320000: + case 266667: + case 200000: + break; + default: + MISSING_CASE(cdclk); + return; + } + + /* + * Specs are full of misinformation, but testing on actual + * hardware has shown that we just need to write the desired + * CCK divider into the Punit register. + */ + cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK_CHV; + val |= (cmd << DSPFREQGUAR_SHIFT_CHV); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); +} + +static int bdw_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; +} + +static void bdw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + cdclk_state->cdclk = 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + cdclk_state->cdclk = 450000; + else if (freq == LCPLL_CLK_FREQ_450) + cdclk_state->cdclk = 450000; + else if (freq == LCPLL_CLK_FREQ_54O_BDW) + cdclk_state->cdclk = 540000; + else if (freq == LCPLL_CLK_FREQ_337_5_BDW) + cdclk_state->cdclk = 337500; + else + cdclk_state->cdclk = 675000; +} + +static void bdw_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + uint32_t val, data; + int ret; + + if (WARN((I915_READ(LCPLL_CTL) & + (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | + LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | + LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | + LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, + "trying to change cdclk frequency with cdclk not enabled\n")) + return; + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, + BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("failed to inform pcode about cdclk change\n"); + return; + } + + val = I915_READ(LCPLL_CTL); + val |= LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us(I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE, 1)) + DRM_ERROR("Switching to FCLK failed\n"); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CLK_FREQ_MASK; + + switch (cdclk) { + case 450000: + val |= LCPLL_CLK_FREQ_450; + data = 0; + break; + case 540000: + val |= LCPLL_CLK_FREQ_54O_BDW; + data = 1; + break; + case 337500: + val |= LCPLL_CLK_FREQ_337_5_BDW; + data = 2; + break; + case 675000: + val |= LCPLL_CLK_FREQ_675_BDW; + data = 3; + break; + default: + WARN(1, "invalid cdclk frequency\n"); + return; + } + + I915_WRITE(LCPLL_CTL, val); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us((I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + DRM_ERROR("Switching back to LCPLL failed\n"); + + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); + mutex_unlock(&dev_priv->rps.hw_lock); + + I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); + + intel_update_cdclk(dev_priv); + + WARN(cdclk != dev_priv->cdclk.hw.cdclk, + "cdclk requested %d kHz but got %d kHz\n", + cdclk, dev_priv->cdclk.hw.cdclk); +} + +static int skl_calc_cdclk(int max_pixclk, int vco) +{ + if (vco == 8640000) { + if (max_pixclk > 540000) + return 617143; + else if (max_pixclk > 432000) + return 540000; + else if (max_pixclk > 308571) + return 432000; + else + return 308571; + } else { + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; + } +} + +static void skl_dpll0_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 val; + + cdclk_state->ref = 24000; + cdclk_state->vco = 0; + + val = I915_READ(LCPLL1_CTL); + if ((val & LCPLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) + return; + + val = I915_READ(DPLL_CTRL1); + + if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | + DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) + return; + + switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): + cdclk_state->vco = 8100000; + break; + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): + cdclk_state->vco = 8640000; + break; + default: + MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + break; + } +} + +static void skl_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 cdctl; + + skl_dpll0_update(dev_priv, cdclk_state); + + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; + + cdctl = I915_READ(CDCLK_CTL); + + if (cdclk_state->vco == 8640000) { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + cdclk_state->cdclk = 432000; + break; + case CDCLK_FREQ_337_308: + cdclk_state->cdclk = 308571; + break; + case CDCLK_FREQ_540: + cdclk_state->cdclk = 540000; + break; + case CDCLK_FREQ_675_617: + cdclk_state->cdclk = 617143; + break; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; + } + } else { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + cdclk_state->cdclk = 450000; + break; + case CDCLK_FREQ_337_308: + cdclk_state->cdclk = 337500; + break; + case CDCLK_FREQ_540: + cdclk_state->cdclk = 540000; + break; + case CDCLK_FREQ_675_617: + cdclk_state->cdclk = 675000; + break; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; + } + } +} + +/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ +static int skl_cdclk_decimal(int cdclk) +{ + return DIV_ROUND_CLOSEST(cdclk - 1000, 500); +} + +static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, + int vco) +{ + bool changed = dev_priv->skl_preferred_vco_freq != vco; + + dev_priv->skl_preferred_vco_freq = vco; + + if (changed) + intel_update_max_cdclk(dev_priv); +} + +static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) +{ + int min_cdclk = skl_calc_cdclk(0, vco); + u32 val; + + WARN_ON(vco != 8100000 && vco != 8640000); + + /* select the minimum CDCLK before enabling DPLL 0 */ + val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); + I915_WRITE(CDCLK_CTL, val); + POSTING_READ(CDCLK_CTL); + + /* + * We always enable DPLL0 with the lowest link rate possible, but still + * taking into account the VCO required to operate the eDP panel at the + * desired frequency. The usual DP link rates operate with a VCO of + * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. + * The modeset code is responsible for the selection of the exact link + * rate later on, with the constraint of choosing a frequency that + * works with vco. + */ + val = I915_READ(DPLL_CTRL1); + + val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); + if (vco == 8640000) + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, + SKL_DPLL0); + else + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, + SKL_DPLL0); + + I915_WRITE(DPLL_CTRL1, val); + POSTING_READ(DPLL_CTRL1); + + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); + + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, + 5)) + DRM_ERROR("DPLL0 not locked\n"); + + dev_priv->cdclk.hw.vco = vco; + + /* We'll want to keep using the current vco from now on. */ + skl_set_preferred_cdclk_vco(dev_priv, vco); +} + +static void skl_dpll0_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, 0, + 1)) + DRM_ERROR("Couldn't disable DPLL0\n"); + + dev_priv->cdclk.hw.vco = 0; +} + +static void skl_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; + u32 freq_select, pcu_ack; + int ret; + + WARN_ON((cdclk == 24000) != (vco == 0)); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", + ret); + return; + } + + /* set CDCLK_CTL */ + switch (cdclk) { + case 450000: + case 432000: + freq_select = CDCLK_FREQ_450_432; + pcu_ack = 1; + break; + case 540000: + freq_select = CDCLK_FREQ_540; + pcu_ack = 2; + break; + case 308571: + case 337500: + default: + freq_select = CDCLK_FREQ_337_308; + pcu_ack = 0; + break; + case 617143: + case 675000: + freq_select = CDCLK_FREQ_675_617; + pcu_ack = 3; + break; + } + + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + skl_dpll0_disable(dev_priv); + + if (dev_priv->cdclk.hw.vco != vco) + skl_dpll0_enable(dev_priv, vco); + + I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + POSTING_READ(CDCLK_CTL); + + /* inform PCU of the change */ + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); +} + +static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + uint32_t cdctl, expected; + + /* + * check if the pre-os initialized the display + * There is SWF18 scratchpad register defined which is set by the + * pre-os which can be used by the OS drivers to check the status + */ + if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) + goto sanitize; + + intel_update_cdclk(dev_priv); + /* Is PLL enabled and locked ? */ + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Noticed in some instances that the freq selection is correct but + * decimal part is programmed wrong from BIOS where pre-os does not + * enable display. Verify the same as well. + */ + cdctl = I915_READ(CDCLK_CTL); + expected = (cdctl & CDCLK_FREQ_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk.hw.cdclk = 0; + /* force full PLL disable + enable */ + dev_priv->cdclk.hw.vco = -1; +} + +/** + * skl_init_cdclk - Initialize CDCLK on SKL + * @dev_priv: i915 device + * + * Initialize CDCLK for SKL and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void skl_init_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state; + + skl_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) { + /* + * Use the current vco as our initial + * guess as to what the preferred vco is. + */ + if (dev_priv->skl_preferred_vco_freq == 0) + skl_set_preferred_cdclk_vco(dev_priv, + dev_priv->cdclk.hw.vco); + return; + } + + cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.vco = dev_priv->skl_preferred_vco_freq; + if (cdclk_state.vco == 0) + cdclk_state.vco = 8100000; + cdclk_state.cdclk = skl_calc_cdclk(0, cdclk_state.vco); + + skl_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * skl_uninit_cdclk - Uninitialize CDCLK on SKL + * @dev_priv: i915 device + * + * Uninitialize CDCLK for SKL and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void skl_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + skl_set_cdclk(dev_priv, &cdclk_state); +} + +static int bxt_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 576000) + return 624000; + else if (max_pixclk > 384000) + return 576000; + else if (max_pixclk > 288000) + return 384000; + else if (max_pixclk > 144000) + return 288000; + else + return 144000; +} + +static int glk_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 2 * 158400) + return 316800; + else if (max_pixclk > 2 * 79200) + return 158400; + else + return 79200; +} + +static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk.hw.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 144000: + case 288000: + case 384000: + case 576000: + ratio = 60; + break; + case 624000: + ratio = 65; + break; + } + + return dev_priv->cdclk.hw.ref * ratio; +} + +static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk.hw.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 79200: + case 158400: + case 316800: + ratio = 33; + break; + } + + return dev_priv->cdclk.hw.ref * ratio; +} + +static void bxt_de_pll_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 val; + + cdclk_state->ref = 19200; + cdclk_state->vco = 0; + + val = I915_READ(BXT_DE_PLL_ENABLE); + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) + return; + + val = I915_READ(BXT_DE_PLL_CTL); + cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; +} + +static void bxt_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 divider; + int div; + + bxt_de_pll_update(dev_priv, cdclk_state); + + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; + + divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; + + switch (divider) { + case BXT_CDCLK_CD2X_DIV_SEL_1: + div = 2; + break; + case BXT_CDCLK_CD2X_DIV_SEL_1_5: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + div = 3; + break; + case BXT_CDCLK_CD2X_DIV_SEL_2: + div = 4; + break; + case BXT_CDCLK_CD2X_DIV_SEL_4: + div = 8; + break; + default: + MISSING_CASE(divider); + return; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); +} + +static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(BXT_DE_PLL_ENABLE, 0); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, + 1)) + DRM_ERROR("timeout waiting for DE PLL unlock\n"); + + dev_priv->cdclk.hw.vco = 0; +} + +static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) +{ + int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); + u32 val; + + val = I915_READ(BXT_DE_PLL_CTL); + val &= ~BXT_DE_PLL_RATIO_MASK; + val |= BXT_DE_PLL_RATIO(ratio); + I915_WRITE(BXT_DE_PLL_CTL, val); + + I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, + BXT_DE_PLL_LOCK, + BXT_DE_PLL_LOCK, + 1)) + DRM_ERROR("timeout waiting for DE PLL lock\n"); + + dev_priv->cdclk.hw.vco = vco; +} + +static void bxt_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; + u32 val, divider; + int ret; + + /* cdclk = vco / 2 / div{1,1.5,2,4} */ + switch (DIV_ROUND_CLOSEST(vco, cdclk)) { + case 8: + divider = BXT_CDCLK_CD2X_DIV_SEL_4; + break; + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; + break; + case 3: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; + break; + case 2: + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + default: + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(vco != 0); + + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + } + + /* Inform power controller of upcoming frequency change */ + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", + ret, cdclk); + return; + } + + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_disable(dev_priv); + + if (dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_enable(dev_priv, vco); + + val = divider | skl_cdclk_decimal(cdclk); + /* + * FIXME if only the cd2x divider needs changing, it could be done + * without shutting off the pipe (if only one pipe is active). + */ + val |= BXT_CDCLK_CD2X_PIPE_NONE; + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (cdclk >= 500000) + val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + I915_WRITE(CDCLK_CTL, val); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + DIV_ROUND_UP(cdclk, 25000)); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", + ret, cdclk); + return; + } + + intel_update_cdclk(dev_priv); +} + +static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + u32 cdctl, expected; + + intel_update_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Some BIOS versions leave an incorrect decimal frequency value and + * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, + * so sanitize this register. + */ + cdctl = I915_READ(CDCLK_CTL); + /* + * Let's ignore the pipe field, since BIOS could have configured the + * dividers both synching to an active pipe, or asynchronously + * (PIPE_NONE). + */ + cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; + + expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (dev_priv->cdclk.hw.cdclk >= 500000) + expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk.hw.cdclk = 0; + + /* force full PLL disable + enable */ + dev_priv->cdclk.hw.vco = -1; +} + +/** + * bxt_init_cdclk - Initialize CDCLK on BXT + * @dev_priv: i915 device + * + * Initialize CDCLK for BXT and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void bxt_init_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state; + + bxt_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) + return; + + cdclk_state = dev_priv->cdclk.hw; + + /* + * FIXME: + * - The initial CDCLK needs to be read from VBT. + * Need to make this change after VBT has changes for BXT. + */ + if (IS_GEMINILAKE(dev_priv)) { + cdclk_state.cdclk = glk_calc_cdclk(0); + cdclk_state.vco = glk_de_pll_vco(dev_priv, cdclk_state.cdclk); + } else { + cdclk_state.cdclk = bxt_calc_cdclk(0); + cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); + } + + bxt_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * bxt_uninit_cdclk - Uninitialize CDCLK on BXT + * @dev_priv: i915 device + * + * Uninitialize CDCLK for BXT and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + bxt_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * intel_cdclk_state_compare - Determine if two CDCLK states differ + * @a: first CDCLK state + * @b: second CDCLK state + * + * Returns: + * True if the CDCLK states are identical, false if they differ. + */ +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) +{ + return memcmp(a, b, sizeof(*a)) == 0; +} + +/** + * intel_set_cdclk - Push the CDCLK state to the hardware + * @dev_priv: i915 device + * @cdclk_state: new CDCLK state + * + * Program the hardware based on the passed in CDCLK state, + * if necessary. + */ +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + if (intel_cdclk_state_compare(&dev_priv->cdclk.hw, cdclk_state)) + return; + + if (WARN_ON_ONCE(!dev_priv->display.set_cdclk)) + return; + + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz, VCO %d kHz, ref %d kHz\n", + cdclk_state->cdclk, cdclk_state->vco, + cdclk_state->ref); + + dev_priv->display.set_cdclk(dev_priv, cdclk_state); +} + +static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, + int pixel_rate) +{ + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) + pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + + /* BSpec says "Do not use DisplayPort with CDCLK less than + * 432 MHz, audio enabled, port width x4, and link rate + * HBR2 (5.4 GHz), or else there may be audio corruption or + * screen corruption." + */ + if (intel_crtc_has_dp_encoder(crtc_state) && + crtc_state->has_audio && + crtc_state->port_clock >= 540000 && + crtc_state->lane_count == 4) + pixel_rate = max(432000, pixel_rate); + + return pixel_rate; +} + +/* compute the max rate for new configuration */ +static int intel_max_pixel_rate(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + struct intel_crtc_state *crtc_state; + unsigned int max_pixel_rate = 0, i; + enum pipe pipe; + + memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, + sizeof(intel_state->min_pixclk)); + + for_each_crtc_in_state(state, crtc, cstate, i) { + int pixel_rate; + + crtc_state = to_intel_crtc_state(cstate); + if (!crtc_state->base.enable) { + intel_state->min_pixclk[i] = 0; + continue; + } + + pixel_rate = crtc_state->pixel_rate; + + if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) + pixel_rate = + bdw_adjust_min_pipe_pixel_rate(crtc_state, + pixel_rate); + + intel_state->min_pixclk[i] = pixel_rate; + } + + for_each_pipe(dev_priv, pipe) + max_pixel_rate = max(intel_state->min_pixclk[pipe], + max_pixel_rate); + + return max_pixel_rate; +} + +static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + int cdclk; + + cdclk = vlv_calc_cdclk(dev_priv, max_pixclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = vlv_calc_cdclk(dev_priv, 0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int max_pixclk = intel_max_pixel_rate(state); + int cdclk; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = bdw_calc_cdclk(max_pixclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = bdw_calc_cdclk(0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + const int max_pixclk = intel_max_pixel_rate(state); + int cdclk, vco; + + vco = intel_state->cdclk.logical.vco; + if (!vco) + vco = dev_priv->skl_preferred_vco_freq; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = skl_calc_cdclk(max_pixclk, vco); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = skl_calc_cdclk(0, vco); + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + int cdclk, vco; + + if (IS_GEMINILAKE(dev_priv)) { + cdclk = glk_calc_cdclk(max_pixclk); + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { + cdclk = bxt_calc_cdclk(max_pixclk); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + if (IS_GEMINILAKE(dev_priv)) { + cdclk = glk_calc_cdclk(0); + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { + cdclk = bxt_calc_cdclk(0); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) +{ + int max_cdclk_freq = dev_priv->max_cdclk_freq; + + if (IS_GEMINILAKE(dev_priv)) + return 2 * max_cdclk_freq; + else if (INTEL_INFO(dev_priv)->gen >= 9 || + IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + return max_cdclk_freq; + else if (IS_CHERRYVIEW(dev_priv)) + return max_cdclk_freq*95/100; + else if (INTEL_INFO(dev_priv)->gen < 4) + return 2*max_cdclk_freq*90/100; + else + return max_cdclk_freq*90/100; +} + +/** + * intel_update_max_cdclk - Determine the maximum support CDCLK frequency + * @dev_priv: i915 device + * + * Determine the maximum CDCLK frequency the platform supports, and also + * derive the maximum dot clock frequency the maximum CDCLK frequency + * allows. + */ +void intel_update_max_cdclk(struct drm_i915_private *dev_priv) +{ + if (IS_GEN9_BC(dev_priv)) { + u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; + int max_cdclk, vco; + + vco = dev_priv->skl_preferred_vco_freq; + WARN_ON(vco != 8100000 && vco != 8640000); + + /* + * Use the lower (vco 8640) cdclk values as a + * first guess. skl_calc_cdclk() will correct it + * if the preferred vco is 8100 instead. + */ + if (limit == SKL_DFSM_CDCLK_LIMIT_675) + max_cdclk = 617143; + else if (limit == SKL_DFSM_CDCLK_LIMIT_540) + max_cdclk = 540000; + else if (limit == SKL_DFSM_CDCLK_LIMIT_450) + max_cdclk = 432000; + else + max_cdclk = 308571; + + dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->max_cdclk_freq = 316800; + } else if (IS_BROXTON(dev_priv)) { + dev_priv->max_cdclk_freq = 624000; + } else if (IS_BROADWELL(dev_priv)) { + /* + * FIXME with extra cooling we can allow + * 540 MHz for ULX and 675 Mhz for ULT. + * How can we know if extra cooling is + * available? PCI ID, VTB, something else? + */ + if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULX(dev_priv)) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULT(dev_priv)) + dev_priv->max_cdclk_freq = 540000; + else + dev_priv->max_cdclk_freq = 675000; + } else if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 320000; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 400000; + } else { + /* otherwise assume cdclk is fixed */ + dev_priv->max_cdclk_freq = dev_priv->cdclk.hw.cdclk; + } + + dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); + + DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", + dev_priv->max_cdclk_freq); + + DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", + dev_priv->max_dotclk_freq); +} + +/** + * intel_update_cdclk - Determine the current CDCLK frequency + * @dev_priv: i915 device + * + * Determine the current CDCLK frequency. + */ +void intel_update_cdclk(struct drm_i915_private *dev_priv) +{ + dev_priv->display.get_cdclk(dev_priv, &dev_priv->cdclk.hw); + + DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", + dev_priv->cdclk.hw.cdclk, dev_priv->cdclk.hw.vco, + dev_priv->cdclk.hw.ref); + + /* + * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): + * Programmng [sic] note: bit[9:2] should be programmed to the number + * of cdclk that generates 4MHz reference clock freq which is used to + * generate GMBus clock. This will vary with the cdclk freq. + */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + I915_WRITE(GMBUSFREQ_VLV, + DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); +} + +static int pch_rawclk(struct drm_i915_private *dev_priv) +{ + return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; +} + +static int vlv_hrawclk(struct drm_i915_private *dev_priv) +{ + /* RAWCLK_FREQ_VLV register updated from power well code */ + return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", + CCK_DISPLAY_REF_CLOCK_CONTROL); +} + +static int g4x_hrawclk(struct drm_i915_private *dev_priv) +{ + uint32_t clkcfg; + + /* hrawclock is 1/4 the FSB frequency */ + clkcfg = I915_READ(CLKCFG); + switch (clkcfg & CLKCFG_FSB_MASK) { + case CLKCFG_FSB_400: + return 100000; + case CLKCFG_FSB_533: + return 133333; + case CLKCFG_FSB_667: + return 166667; + case CLKCFG_FSB_800: + return 200000; + case CLKCFG_FSB_1067: + return 266667; + case CLKCFG_FSB_1333: + return 333333; + /* these two are just a guess; one of them might be right */ + case CLKCFG_FSB_1600: + case CLKCFG_FSB_1600_ALT: + return 400000; + default: + return 133333; + } +} + +/** + * intel_update_rawclk - Determine the current RAWCLK frequency + * @dev_priv: i915 device + * + * Determine the current RAWCLK frequency. RAWCLK is a fixed + * frequency clock so this needs to done only once. + */ +void intel_update_rawclk(struct drm_i915_private *dev_priv) +{ + if (HAS_PCH_SPLIT(dev_priv)) + dev_priv->rawclk_freq = pch_rawclk(dev_priv); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->rawclk_freq = vlv_hrawclk(dev_priv); + else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) + dev_priv->rawclk_freq = g4x_hrawclk(dev_priv); + else + /* no rawclk on other platforms, or no need to know it */ + return; + + DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); +} + +/** + * intel_init_cdclk_hooks - Initialize CDCLK related modesetting hooks + * @dev_priv: i915 device + */ +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) +{ + if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = chv_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = vlv_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_BROADWELL(dev_priv)) { + dev_priv->display.set_cdclk = bdw_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + bdw_modeset_calc_cdclk; + } else if (IS_GEN9_LP(dev_priv)) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + bxt_modeset_calc_cdclk; + } else if (IS_GEN9_BC(dev_priv)) { + dev_priv->display.set_cdclk = skl_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + skl_modeset_calc_cdclk; + } + + if (IS_GEN9_BC(dev_priv)) + dev_priv->display.get_cdclk = skl_get_cdclk; + else if (IS_GEN9_LP(dev_priv)) + dev_priv->display.get_cdclk = bxt_get_cdclk; + else if (IS_BROADWELL(dev_priv)) + dev_priv->display.get_cdclk = bdw_get_cdclk; + else if (IS_HASWELL(dev_priv)) + dev_priv->display.get_cdclk = hsw_get_cdclk; + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display.get_cdclk = vlv_get_cdclk; + else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_GEN5(dev_priv)) + dev_priv->display.get_cdclk = fixed_450mhz_get_cdclk; + else if (IS_GM45(dev_priv)) + dev_priv->display.get_cdclk = gm45_get_cdclk; + else if (IS_G4X(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I965GM(dev_priv)) + dev_priv->display.get_cdclk = i965gm_get_cdclk; + else if (IS_I965G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_PINEVIEW(dev_priv)) + dev_priv->display.get_cdclk = pnv_get_cdclk; + else if (IS_G33(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I945GM(dev_priv)) + dev_priv->display.get_cdclk = i945gm_get_cdclk; + else if (IS_I945G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_I915GM(dev_priv)) + dev_priv->display.get_cdclk = i915gm_get_cdclk; + else if (IS_I915G(dev_priv)) + dev_priv->display.get_cdclk = fixed_333mhz_get_cdclk; + else if (IS_I865G(dev_priv)) + dev_priv->display.get_cdclk = fixed_266mhz_get_cdclk; + else if (IS_I85X(dev_priv)) + dev_priv->display.get_cdclk = i85x_get_cdclk; + else if (IS_I845G(dev_priv)) + dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; + else { /* 830 */ + WARN(!IS_I830(dev_priv), + "Unknown platform. Assuming 133 MHz CDCLK\n"); + dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; + } +} diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index d81232b79f00..b9e5266d933b 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -340,20 +340,12 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) hsw_enable_ips(intel_crtc); } -/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ -static void broadwell_load_luts(struct drm_crtc_state *state) +static void bdw_load_degamma_lut(struct drm_crtc_state *state) { - struct drm_crtc *crtc = state->crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc_state *intel_state = to_intel_crtc_state(state); - enum pipe pipe = to_intel_crtc(crtc)->pipe; + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; - if (crtc_state_is_legacy(state)) { - haswell_load_luts(state); - return; - } - I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); @@ -377,6 +369,20 @@ static void broadwell_load_luts(struct drm_crtc_state *state) (v << 20) | (v << 10) | v); } } +} + +static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + + WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); + + I915_WRITE(PREC_PAL_INDEX(pipe), + (offset ? PAL_PREC_SPLIT_MODE : 0) | + PAL_PREC_AUTO_INCREMENT | + offset); if (state->gamma_lut) { struct drm_color_lut *lut = @@ -410,6 +416,23 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1); I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1); } +} + +/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ +static void broadwell_load_luts(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + bdw_load_degamma_lut(state); + bdw_load_gamma_lut(state, + INTEL_INFO(dev_priv)->color.degamma_lut_size); intel_state->gamma_mode = GAMMA_MODE_MODE_SPLIT; I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_SPLIT); @@ -422,6 +445,58 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_INDEX(pipe), 0); } +static void glk_load_degamma_lut(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + const uint32_t lut_size = 33; + uint32_t i; + + /* + * When setting the auto-increment bit, the hardware seems to + * ignore the index bits, so we need to reset it to index 0 + * separately. + */ + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), 0); + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), PRE_CSC_GAMC_AUTO_INCREMENT); + + /* + * FIXME: The pipe degamma table in geminilake doesn't support + * different values per channel, so this just loads a linear table. + */ + for (i = 0; i < lut_size; i++) { + uint32_t v = (i * ((1 << 16) - 1)) / (lut_size - 1); + + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), v); + } + + /* Clamp values > 1.0. */ + while (i++ < 35) + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), (1 << 16) - 1); +} + +static void glk_load_luts(struct drm_crtc_state *state) +{ + struct drm_crtc *crtc = state->crtc; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(crtc)->pipe; + + glk_load_degamma_lut(state); + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + bdw_load_gamma_lut(state, 0); + + intel_state->gamma_mode = GAMMA_MODE_MODE_10BIT; + I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_10BIT); + POSTING_READ(GAMMA_MODE(pipe)); +} + /* Loads the palette/gamma unit for the CRTC on CherryView. */ static void cherryview_load_luts(struct drm_crtc_state *state) { @@ -536,10 +611,13 @@ void intel_color_init(struct drm_crtc *crtc) } else if (IS_HASWELL(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = haswell_load_luts; - } else if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv) || - IS_BROXTON(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || + IS_BROXTON(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_luts = glk_load_luts; } else { dev_priv->display.load_luts = i9xx_load_luts; } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 2bf5aca6e37c..8c82607294c6 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -69,12 +69,11 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -91,7 +90,7 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -676,7 +675,6 @@ intel_crt_detect(struct drm_connector *connector, bool force) struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; enum drm_connector_status status; struct intel_load_detect_pipe tmp; struct drm_modeset_acquire_ctx ctx; @@ -689,8 +687,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); if (I915_HAS_HOTPLUG(dev_priv)) { /* We can not rely on the HPD pin always being correctly wired @@ -745,7 +742,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) drm_modeset_acquire_fini(&ctx); out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return status; } @@ -761,12 +758,10 @@ static int intel_crt_get_modes(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; int ret; struct i2c_adapter *i2c; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin); ret = intel_crt_ddc_get_modes(connector, i2c); @@ -778,7 +773,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) ret = intel_crt_ddc_get_modes(connector, i2c); out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return ret; } @@ -904,6 +899,8 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->adpa_reg = adpa_reg; + crt->base.power_domain = POWER_DOMAIN_PORT_CRT; + crt->base.compute_config = intel_crt_compute_config; if (HAS_PCH_SPLIT(dev_priv)) { crt->base.disable = pch_disable_crt; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 0085bc745f6a..14659c7e2858 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -34,9 +34,9 @@ * low-power state and comes back to normal. */ -#define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin" +#define I915_CSR_GLK "i915/glk_dmc_ver1_03.bin" MODULE_FIRMWARE(I915_CSR_GLK); -#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) +#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 3) #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); @@ -396,13 +396,11 @@ static void csr_load_work_fn(struct work_struct *work) struct drm_i915_private *dev_priv; struct intel_csr *csr; const struct firmware *fw = NULL; - int ret; dev_priv = container_of(work, typeof(*dev_priv), csr.work); csr = &dev_priv->csr; - ret = request_firmware(&fw, dev_priv->csr.fw_path, - &dev_priv->drm.pdev->dev); + request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev); if (fw) dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 66b367d0771a..a7c08d7027fa 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -34,6 +34,19 @@ struct ddi_buf_trans { u8 i_boost; /* SKL: I_boost; valid: 0x0, 0x1, 0x3, 0x7 */ }; +static const u8 index_to_dp_signal_levels[] = { + [0] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [1] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [2] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [3] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3, + [4] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [5] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [6] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [7] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [8] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [9] = DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0, +}; + /* HDMI/DVI modes ignore everything but the last 2 items. So we share * them for both DP and FDI transports, allowing those ports to * automatically adapt to HDMI connections as well @@ -445,7 +458,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por if (IS_GEN9_LP(dev_priv)) return hdmi_level; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); hdmi_default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { @@ -468,6 +481,59 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por return hdmi_level; } +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv)) { + return kbl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + return bdw_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } + + *n_entries = 0; + return NULL; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct * values in advance. This function programs the correct values for @@ -477,76 +543,43 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_dp_entries, n_edp_entries, size; + int i, n_entries; enum port port = intel_ddi_get_encoder_port(encoder); - const struct ddi_buf_trans *ddi_translations_fdi; - const struct ddi_buf_trans *ddi_translations_dp; - const struct ddi_buf_trans *ddi_translations_edp; const struct ddi_buf_trans *ddi_translations; if (IS_GEN9_LP(dev_priv)) return; - if (IS_KABYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - kbl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_SKYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - skl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_BROADWELL(dev_priv)) { - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_edp = bdw_get_buf_trans_edp(dev_priv, &n_edp_entries); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } else if (IS_HASWELL(dev_priv)) { - ddi_translations_fdi = hsw_ddi_translations_fdi; - ddi_translations_dp = hsw_ddi_translations_dp; - ddi_translations_edp = hsw_ddi_translations_dp; - n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - } else { - WARN(1, "ddi translation table missing\n"); - ddi_translations_edp = bdw_ddi_translations_dp; - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } - - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; - - if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && - port != PORT_A && port != PORT_E && - n_edp_entries > 9)) - n_edp_entries = 9; - } - switch (encoder->type) { case INTEL_OUTPUT_EDP: - ddi_translations = ddi_translations_edp; - size = n_edp_entries; + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, + &n_entries); break; case INTEL_OUTPUT_DP: - ddi_translations = ddi_translations_dp; - size = n_dp_entries; + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, + &n_entries); break; case INTEL_OUTPUT_ANALOG: - ddi_translations = ddi_translations_fdi; - size = n_dp_entries; + ddi_translations = intel_ddi_get_buf_trans_fdi(dev_priv, + &n_entries); break; default: - BUG(); + MISSING_CASE(encoder->type); + return; + } + + if (IS_GEN9_BC(dev_priv)) { + /* If we're boosting the current, set bit 31 of trans1 */ + if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; + + if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && + port != PORT_A && port != PORT_E && + n_entries > 9)) + n_entries = 9; } - for (i = 0; i < size; i++) { + for (i = 0; i < n_entries; i++) { I915_WRITE(DDI_BUF_TRANS_LO(port, i), ddi_translations[i].trans1 | iboost_bit); I915_WRITE(DDI_BUF_TRANS_HI(port, i), @@ -572,7 +605,7 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) hdmi_level = intel_ddi_hdmi_level(dev_priv, port); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); /* If we're boosting the current, set bit 31 of trans1 */ @@ -1089,7 +1122,7 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) <= 8) hsw_ddi_clock_get(encoder, pipe_config); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) skl_ddi_clock_get(encoder, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_clock_get(encoder, pipe_config); @@ -1150,7 +1183,7 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, struct intel_encoder *intel_encoder = intel_ddi_get_crtc_new_encoder(crtc_state); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) return skl_ddi_pll_select(intel_crtc, crtc_state, intel_encoder); else if (IS_GEN9_LP(dev_priv)) @@ -1316,12 +1349,11 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) enum port port = intel_ddi_get_encoder_port(intel_encoder); enum pipe pipe = 0; enum transcoder cpu_transcoder; - enum intel_display_power_domain power_domain; uint32_t tmp; bool ret; - power_domain = intel_display_port_power_domain(intel_encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + intel_encoder->power_domain)) return false; if (!intel_encoder->get_hw_state(intel_encoder, &pipe)) { @@ -1363,7 +1395,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) } out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return ret; } @@ -1374,13 +1406,12 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_ddi_get_encoder_port(encoder); - enum intel_display_power_domain power_domain; u32 tmp; int i; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -1437,11 +1468,22 @@ out: "(PHY_CTL %08x)\n", port_name(port), tmp); } - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } +static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) +{ + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + enum pipe pipe; + + if (intel_ddi_get_hw_state(encoder, &pipe)) + return BIT_ULL(dig_port->ddi_io_power_domain); + + return 0; +} + void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc) { struct drm_crtc *crtc = &intel_crtc->base; @@ -1582,50 +1624,38 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, ddi_translations[level].deemphasis); } -static uint32_t translate_signal_level(int signal_levels) +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { - uint32_t level; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int n_entries; - switch (signal_levels) { - default: - DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level: 0x%x\n", - signal_levels); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 0; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 1; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 2; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3: - level = 3; - break; + if (encoder->type == INTEL_OUTPUT_EDP) + intel_ddi_get_buf_trans_edp(dev_priv, &n_entries); + else + intel_ddi_get_buf_trans_dp(dev_priv, &n_entries); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 4; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 5; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 6; - break; + if (WARN_ON(n_entries < 1)) + n_entries = 1; + if (WARN_ON(n_entries > ARRAY_SIZE(index_to_dp_signal_levels))) + n_entries = ARRAY_SIZE(index_to_dp_signal_levels); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 7; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 8; - break; + return index_to_dp_signal_levels[n_entries - 1] & + DP_TRAIN_VOLTAGE_SWING_MASK; +} - case DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 9; - break; +static uint32_t translate_signal_level(int signal_levels) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(index_to_dp_signal_levels); i++) { + if (index_to_dp_signal_levels[i] == signal_levels) + return i; } - return level; + WARN(1, "Unsupported voltage swing/pre-emphasis level: 0x%x\n", + signal_levels); + + return 0; } uint32_t ddi_signal_levels(struct intel_dp *intel_dp) @@ -1641,7 +1671,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); @@ -1658,7 +1688,7 @@ void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { uint32_t val; /* DDI -> PLL mapping */ @@ -1684,6 +1714,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_set_link_params(intel_dp, link_rate, lane_count, link_mst); @@ -1691,6 +1722,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_edp_panel_on(intel_dp); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_dp_ddi_buffers(encoder); intel_ddi_init_dp_buf_reg(encoder); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); @@ -1710,11 +1744,15 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, struct drm_encoder *drm_encoder = &encoder->base; enum port port = intel_ddi_get_encoder_port(encoder); int level = intel_ddi_hdmi_level(dev_priv, port); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_hdmi_ddi_buffers(encoder); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, @@ -1756,6 +1794,7 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, struct drm_encoder *encoder = &intel_encoder->base; struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); int type = intel_encoder->type; uint32_t val; bool wait = false; @@ -1784,7 +1823,10 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, intel_edp_panel_off(intel_dp); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (dig_port) + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + + if (IS_GEN9_BC(dev_priv)) I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port))); else if (INTEL_GEN(dev_priv) < 9) @@ -1835,8 +1877,6 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; @@ -1863,10 +1903,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, intel_edp_drrs_enable(intel_dp, pipe_config); } - if (intel_crtc->config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(intel_encoder, pipe_config, conn_state); - } } static void intel_disable_ddi(struct intel_encoder *intel_encoder, @@ -1874,16 +1912,10 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *old_conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int type = intel_encoder->type; - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - if (intel_crtc->config->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(intel_encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - } if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); @@ -2126,45 +2158,6 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) return connector; } -struct intel_shared_dpll * -intel_ddi_get_link_dpll(struct intel_dp *intel_dp, int clock) -{ - struct intel_connector *connector = intel_dp->attached_connector; - struct intel_encoder *encoder = connector->encoder; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct intel_shared_dpll *pll = NULL; - struct intel_shared_dpll_state tmp_pll_state; - enum intel_dpll_id dpll_id; - - if (IS_GEN9_LP(dev_priv)) { - dpll_id = (enum intel_dpll_id)dig_port->port; - /* - * Select the required PLL. This works for platforms where - * there is no shared DPLL. - */ - pll = &dev_priv->shared_dplls[dpll_id]; - if (WARN_ON(pll->active_mask)) { - - DRM_ERROR("Shared DPLL in use. active_mask:%x\n", - pll->active_mask); - return NULL; - } - tmp_pll_state = pll->state; - if (!bxt_ddi_dp_set_dpll_hw_state(clock, - &pll->state.hw_state)) { - DRM_ERROR("Could not setup DPLL\n"); - pll->state = tmp_pll_state; - return NULL; - } - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - pll = skl_find_link_pll(dev_priv, clock); - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - pll = hsw_ddi_dp_get_dpll(encoder, clock); - } - return pll; -} - void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; @@ -2241,12 +2234,38 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->get_hw_state = intel_ddi_get_hw_state; intel_encoder->get_config = intel_ddi_get_config; intel_encoder->suspend = intel_dp_encoder_suspend; + intel_encoder->get_power_domains = intel_ddi_get_power_domains; intel_dig_port->port = port; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + switch (port) { + case PORT_A: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_A_IO; + break; + case PORT_B: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_B_IO; + break; + case PORT_C: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_C_IO; + break; + case PORT_D: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_D_IO; + break; + case PORT_E: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_E_IO; + break; + default: + MISSING_CASE(port); + } + /* * Bspec says that DDI_A_4_LANES is the only supported configuration * for Broxton. Yet some BIOS fail to set this bit on port A if eDP @@ -2265,6 +2284,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_dig_port->max_lanes = max_lanes; intel_encoder->type = INTEL_OUTPUT_UNKNOWN; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); intel_encoder->cloneable = 0; @@ -2274,14 +2294,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) goto err; intel_dig_port->hpd_pulse = intel_dp_hpd_pulse; - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && port == PORT_B) - dev_priv->hotplug.irq_port[PORT_A] = intel_dig_port; - else - dev_priv->hotplug.irq_port[port] = intel_dig_port; + dev_priv->hotplug.irq_port[port] = intel_dig_port; } /* In theory we don't need the encoder->type check, but leave it just in diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index fcf81815daff..2e1fd857e625 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -234,7 +234,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = BIT(ss_max) - 1; + sseu->subslice_mask = GENMASK(ss_max - 1, 0); sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> GEN8_F2_SS_DIS_SHIFT); @@ -410,10 +410,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->has_snoop = !info->has_llc; - /* Snooping is broken on BXT A stepping. */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - info->has_snoop = false; - DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); DRM_DEBUG_DRIVER("subslice total: %u\n", diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 01341670738f..93371c2377b2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -37,6 +37,7 @@ #include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "intel_dsi.h" #include "i915_trace.h" #include <drm/drm_atomic.h> @@ -96,10 +97,9 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, static void ironlake_pch_clock_get(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *ifb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +static int intel_framebuffer_init(struct intel_framebuffer *ifb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc); static void intel_set_pipe_timings(struct intel_crtc *intel_crtc); static void intel_set_pipe_src_size(struct intel_crtc *intel_crtc); @@ -122,9 +122,6 @@ static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); static void intel_modeset_setup_hw_state(struct drm_device *dev); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); -static int ilk_max_pixel_rate(struct drm_atomic_state *state); -static int glk_calc_cdclk(int max_pixclk); -static int bxt_calc_cdclk(int max_pixclk); struct intel_limit { struct { @@ -138,7 +135,7 @@ struct intel_limit { }; /* returns HPLL frequency in kHz */ -static int valleyview_get_vco(struct drm_i915_private *dev_priv) +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) { int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; @@ -170,73 +167,16 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, return DIV_ROUND_CLOSEST(ref_freq << 1, divider + 1); } -static int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, - const char *name, u32 reg) +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg) { if (dev_priv->hpll_freq == 0) - dev_priv->hpll_freq = valleyview_get_vco(dev_priv); + dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv); return vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq); } -static int -intel_pch_rawclk(struct drm_i915_private *dev_priv) -{ - return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; -} - -static int -intel_vlv_hrawclk(struct drm_i915_private *dev_priv) -{ - /* RAWCLK_FREQ_VLV register updated from power well code */ - return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", - CCK_DISPLAY_REF_CLOCK_CONTROL); -} - -static int -intel_g4x_hrawclk(struct drm_i915_private *dev_priv) -{ - uint32_t clkcfg; - - /* hrawclock is 1/4 the FSB frequency */ - clkcfg = I915_READ(CLKCFG); - switch (clkcfg & CLKCFG_FSB_MASK) { - case CLKCFG_FSB_400: - return 100000; - case CLKCFG_FSB_533: - return 133333; - case CLKCFG_FSB_667: - return 166667; - case CLKCFG_FSB_800: - return 200000; - case CLKCFG_FSB_1067: - return 266667; - case CLKCFG_FSB_1333: - return 333333; - /* these two are just a guess; one of them might be right */ - case CLKCFG_FSB_1600: - case CLKCFG_FSB_1600_ALT: - return 400000; - default: - return 133333; - } -} - -void intel_update_rawclk(struct drm_i915_private *dev_priv) -{ - if (HAS_PCH_SPLIT(dev_priv)) - dev_priv->rawclk_freq = intel_pch_rawclk(dev_priv); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_vlv_hrawclk(dev_priv); - else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_g4x_hrawclk(dev_priv); - else - return; /* no rawclk on other platforms, or no need to know it */ - - DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); -} - static void intel_update_czclk(struct drm_i915_private *dev_priv) { if (!(IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))) @@ -2112,11 +2052,13 @@ static void intel_tile_dims(const struct drm_i915_private *dev_priv, } unsigned int -intel_fb_align_height(struct drm_device *dev, unsigned int height, - uint32_t pixel_format, uint64_t fb_modifier) +intel_fb_align_height(struct drm_i915_private *dev_priv, + unsigned int height, + uint32_t pixel_format, + uint64_t fb_modifier) { unsigned int cpp = drm_format_plane_cpp(pixel_format, 0); - unsigned int tile_height = intel_tile_height(to_i915(dev), fb_modifier, cpp); + unsigned int tile_height = intel_tile_height(dev_priv, fb_modifier, cpp); return ALIGN(height, tile_height); } @@ -2682,15 +2624,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; mutex_lock(&dev->struct_mutex); - obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - if (!obj) { - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); + if (!obj) return false; - } if (plane_config->tiling == I915_TILING_X) obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X; @@ -2702,20 +2642,17 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, mode_cmd.modifier[0] = fb->modifier; mode_cmd.flags = DRM_MODE_FB_MODIFIERS; - if (intel_framebuffer_init(dev, to_intel_framebuffer(fb), - &mode_cmd, obj)) { + if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) { DRM_DEBUG_KMS("intel fb init failed\n"); goto out_unref_obj; } - mutex_unlock(&dev->struct_mutex); DRM_DEBUG_KMS("initial plane fb obj %p\n", obj); return true; out_unref_obj: i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); return false; } @@ -3386,13 +3323,22 @@ static void skylake_update_primary_plane(struct drm_plane *plane, int dst_w = drm_rect_width(&plane_state->base.dst); int dst_h = drm_rect_height(&plane_state->base.dst); - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; + plane_ctl = PLANE_CTL_ENABLE; + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } else { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); /* Sizes are 0 based */ @@ -5622,7 +5568,7 @@ static void i9xx_pfit_enable(struct intel_crtc *crtc) I915_WRITE(BCLRPAT(crtc->pipe), 0); } -static enum intel_display_power_domain port_to_power_domain(enum port port) +enum intel_display_power_domain intel_port_to_power_domain(enum port port) { switch (port) { case PORT_A: @@ -5641,91 +5587,15 @@ static enum intel_display_power_domain port_to_power_domain(enum port port) } } -static enum intel_display_power_domain port_to_aux_power_domain(enum port port) -{ - switch (port) { - case PORT_A: - return POWER_DOMAIN_AUX_A; - case PORT_B: - return POWER_DOMAIN_AUX_B; - case PORT_C: - return POWER_DOMAIN_AUX_C; - case PORT_D: - return POWER_DOMAIN_AUX_D; - case PORT_E: - /* FIXME: Check VBT for actual wiring of PORT E */ - return POWER_DOMAIN_AUX_D; - default: - MISSING_CASE(port); - return POWER_DOMAIN_AUX_A; - } -} - -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - /* Only DDI platforms should ever use this output type */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_HDMI: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_ANALOG: - return POWER_DOMAIN_PORT_CRT; - case INTEL_OUTPUT_DSI: - return POWER_DOMAIN_PORT_DSI; - default: - return POWER_DOMAIN_PORT_OTHER; - } -} - -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - case INTEL_OUTPUT_HDMI: - /* - * Only DDI platforms should ever use these output types. - * We can get here after the HDMI detect code has already set - * the type of the shared encoder. Since we can't be sure - * what's the status of the given connectors, play safe and - * run the DP detection too. - */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_aux_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_aux_power_domain(intel_dig_port->port); - default: - MISSING_CASE(intel_encoder->type); - return POWER_DOMAIN_AUX_A; - } -} - -static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state) +static u64 get_crtc_power_domains(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; - unsigned long mask; + u64 mask; enum transcoder transcoder = crtc_state->cpu_transcoder; if (!crtc_state->base.active) @@ -5735,28 +5605,31 @@ static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder)); if (crtc_state->pch_pfit.enabled || crtc_state->pch_pfit.force_thru) - mask |= BIT(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); + mask |= BIT_ULL(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); drm_for_each_encoder_mask(encoder, dev, crtc_state->base.encoder_mask) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); - mask |= BIT(intel_display_port_power_domain(intel_encoder)); + mask |= BIT_ULL(intel_encoder->power_domain); } + if (HAS_DDI(dev_priv) && crtc_state->has_audio) + mask |= BIT(POWER_DOMAIN_AUDIO); + if (crtc_state->shared_dpll) - mask |= BIT(POWER_DOMAIN_PLLS); + mask |= BIT_ULL(POWER_DOMAIN_PLLS); return mask; } -static unsigned long +static u64 modeset_get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum intel_display_power_domain domain; - unsigned long domains, new_domains, old_domains; + u64 domains, new_domains, old_domains; old_domains = intel_crtc->enabled_power_domains; intel_crtc->enabled_power_domains = new_domains = @@ -5771,7 +5644,7 @@ modeset_get_crtc_power_domains(struct drm_crtc *crtc, } static void modeset_put_power_domains(struct drm_i915_private *dev_priv, - unsigned long domains) + u64 domains) { enum intel_display_power_domain domain; @@ -5779,919 +5652,6 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, intel_display_power_put(dev_priv, domain); } -static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) -{ - int max_cdclk_freq = dev_priv->max_cdclk_freq; - - if (IS_GEMINILAKE(dev_priv)) - return 2 * max_cdclk_freq; - else if (INTEL_INFO(dev_priv)->gen >= 9 || - IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - return max_cdclk_freq; - else if (IS_CHERRYVIEW(dev_priv)) - return max_cdclk_freq*95/100; - else if (INTEL_INFO(dev_priv)->gen < 4) - return 2*max_cdclk_freq*90/100; - else - return max_cdclk_freq*90/100; -} - -static int skl_calc_cdclk(int max_pixclk, int vco); - -static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) -{ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; - int max_cdclk, vco; - - vco = dev_priv->skl_preferred_vco_freq; - WARN_ON(vco != 8100000 && vco != 8640000); - - /* - * Use the lower (vco 8640) cdclk values as a - * first guess. skl_calc_cdclk() will correct it - * if the preferred vco is 8100 instead. - */ - if (limit == SKL_DFSM_CDCLK_LIMIT_675) - max_cdclk = 617143; - else if (limit == SKL_DFSM_CDCLK_LIMIT_540) - max_cdclk = 540000; - else if (limit == SKL_DFSM_CDCLK_LIMIT_450) - max_cdclk = 432000; - else - max_cdclk = 308571; - - dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); - } else if (IS_GEMINILAKE(dev_priv)) { - dev_priv->max_cdclk_freq = 316800; - } else if (IS_BROXTON(dev_priv)) { - dev_priv->max_cdclk_freq = 624000; - } else if (IS_BROADWELL(dev_priv)) { - /* - * FIXME with extra cooling we can allow - * 540 MHz for ULX and 675 Mhz for ULT. - * How can we know if extra cooling is - * available? PCI ID, VTB, something else? - */ - if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULX(dev_priv)) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULT(dev_priv)) - dev_priv->max_cdclk_freq = 540000; - else - dev_priv->max_cdclk_freq = 675000; - } else if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 320000; - } else if (IS_VALLEYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 400000; - } else { - /* otherwise assume cdclk is fixed */ - dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; - } - - dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); - - DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", - dev_priv->max_cdclk_freq); - - DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", - dev_priv->max_dotclk_freq); -} - -static void intel_update_cdclk(struct drm_i915_private *dev_priv) -{ - dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(dev_priv); - - if (INTEL_GEN(dev_priv) >= 9) - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", - dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco, - dev_priv->cdclk_pll.ref); - else - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n", - dev_priv->cdclk_freq); - - /* - * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): - * Programmng [sic] note: bit[9:2] should be programmed to the number - * of cdclk that generates 4MHz reference clock freq which is used to - * generate GMBus clock. This will vary with the cdclk freq. - */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - I915_WRITE(GMBUSFREQ_VLV, DIV_ROUND_UP(dev_priv->cdclk_freq, 1000)); -} - -/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ -static int skl_cdclk_decimal(int cdclk) -{ - return DIV_ROUND_CLOSEST(cdclk - 1000, 500); -} - -static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 144000: - case 288000: - case 384000: - case 576000: - ratio = 60; - break; - case 624000: - ratio = 65; - break; - } - - return dev_priv->cdclk_pll.ref * ratio; -} - -static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 79200: - case 158400: - case 316800: - ratio = 33; - break; - } - - return dev_priv->cdclk_pll.ref * ratio; -} - -static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) -{ - I915_WRITE(BXT_DE_PLL_ENABLE, 0); - - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, - 1)) - DRM_ERROR("timeout waiting for DE PLL unlock\n"); - - dev_priv->cdclk_pll.vco = 0; -} - -static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) -{ - int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref); - u32 val; - - val = I915_READ(BXT_DE_PLL_CTL); - val &= ~BXT_DE_PLL_RATIO_MASK; - val |= BXT_DE_PLL_RATIO(ratio); - I915_WRITE(BXT_DE_PLL_CTL, val); - - I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); - - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, - BXT_DE_PLL_LOCK, - BXT_DE_PLL_LOCK, - 1)) - DRM_ERROR("timeout waiting for DE PLL lock\n"); - - dev_priv->cdclk_pll.vco = vco; -} - -static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) -{ - u32 val, divider; - int vco, ret; - - if (IS_GEMINILAKE(dev_priv)) - vco = glk_de_pll_vco(dev_priv, cdclk); - else - vco = bxt_de_pll_vco(dev_priv, cdclk); - - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - - /* cdclk = vco / 2 / div{1,1.5,2,4} */ - switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - case 8: - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 3: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; - break; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - default: - WARN_ON(cdclk != dev_priv->cdclk_pll.ref); - WARN_ON(vco != 0); - - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - } - - /* Inform power controller of upcoming frequency change */ - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000); - mutex_unlock(&dev_priv->rps.hw_lock); - - if (ret) { - DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", - ret, cdclk); - return; - } - - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_disable(dev_priv); - - if (dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_enable(dev_priv, vco); - - val = divider | skl_cdclk_decimal(cdclk); - /* - * FIXME if only the cd2x divider needs changing, it could be done - * without shutting off the pipe (if only one pipe is active). - */ - val |= BXT_CDCLK_CD2X_PIPE_NONE; - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (cdclk >= 500000) - val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - I915_WRITE(CDCLK_CTL, val); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - DIV_ROUND_UP(cdclk, 25000)); - mutex_unlock(&dev_priv->rps.hw_lock); - - if (ret) { - DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", - ret, cdclk); - return; - } - - intel_update_cdclk(dev_priv); -} - -static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - u32 cdctl, expected; - - intel_update_cdclk(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Some BIOS versions leave an incorrect decimal frequency value and - * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, - * so sanitize this register. - */ - cdctl = I915_READ(CDCLK_CTL); - /* - * Let's ignore the pipe field, since BIOS could have configured the - * dividers both synching to an active pipe, or asynchronously - * (PIPE_NONE). - */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (dev_priv->cdclk_freq >= 500000) - expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; - - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} - -void bxt_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk; - - bxt_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) - return; - - /* - * FIXME: - * - The initial CDCLK needs to be read from VBT. - * Need to make this change after VBT has changes for BXT. - */ - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); - - bxt_set_cdclk(dev_priv, cdclk); -} - -void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref); -} - -static int skl_calc_cdclk(int max_pixclk, int vco) -{ - if (vco == 8640000) { - if (max_pixclk > 540000) - return 617143; - else if (max_pixclk > 432000) - return 540000; - else if (max_pixclk > 308571) - return 432000; - else - return 308571; - } else { - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; - } -} - -static void -skl_dpll0_update(struct drm_i915_private *dev_priv) -{ - u32 val; - - dev_priv->cdclk_pll.ref = 24000; - dev_priv->cdclk_pll.vco = 0; - - val = I915_READ(LCPLL1_CTL); - if ((val & LCPLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) - return; - - val = I915_READ(DPLL_CTRL1); - - if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | - DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) - return; - - switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8100000; - break; - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8640000; - break; - default: - MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - break; - } -} - -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco) -{ - bool changed = dev_priv->skl_preferred_vco_freq != vco; - - dev_priv->skl_preferred_vco_freq = vco; - - if (changed) - intel_update_max_cdclk(dev_priv); -} - -static void -skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) -{ - int min_cdclk = skl_calc_cdclk(0, vco); - u32 val; - - WARN_ON(vco != 8100000 && vco != 8640000); - - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - - /* - * We always enable DPLL0 with the lowest link rate possible, but still - * taking into account the VCO required to operate the eDP panel at the - * desired frequency. The usual DP link rates operate with a VCO of - * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. - * The modeset code is responsible for the selection of the exact link - * rate later on, with the constraint of choosing a frequency that - * works with vco. - */ - val = I915_READ(DPLL_CTRL1); - - val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); - if (vco == 8640000) - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, - SKL_DPLL0); - else - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, - SKL_DPLL0); - - I915_WRITE(DPLL_CTRL1, val); - POSTING_READ(DPLL_CTRL1); - - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); - - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, - 5)) - DRM_ERROR("DPLL0 not locked\n"); - - dev_priv->cdclk_pll.vco = vco; - - /* We'll want to keep using the current vco from now on. */ - skl_set_preferred_cdclk_vco(dev_priv, vco); -} - -static void -skl_dpll0_disable(struct drm_i915_private *dev_priv) -{ - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, 0, - 1)) - DRM_ERROR("Couldn't disable DPLL0\n"); - - dev_priv->cdclk_pll.vco = 0; -} - -static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) -{ - u32 freq_select, pcu_ack; - int ret; - - WARN_ON((cdclk == 24000) != (vco == 0)); - - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); - return; - } - - /* set CDCLK_CTL */ - switch (cdclk) { - case 450000: - case 432000: - freq_select = CDCLK_FREQ_450_432; - pcu_ack = 1; - break; - case 540000: - freq_select = CDCLK_FREQ_540; - pcu_ack = 2; - break; - case 308571: - case 337500: - default: - freq_select = CDCLK_FREQ_337_308; - pcu_ack = 0; - break; - case 617143: - case 675000: - freq_select = CDCLK_FREQ_675_617; - pcu_ack = 3; - break; - } - - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - skl_dpll0_disable(dev_priv); - - if (dev_priv->cdclk_pll.vco != vco) - skl_dpll0_enable(dev_priv, vco); - - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); - POSTING_READ(CDCLK_CTL); - - /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv); - -void skl_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); -} - -void skl_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk, vco; - - skl_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) { - /* - * Use the current vco as our initial - * guess as to what the preferred vco is. - */ - if (dev_priv->skl_preferred_vco_freq == 0) - skl_set_preferred_cdclk_vco(dev_priv, - dev_priv->cdclk_pll.vco); - return; - } - - vco = dev_priv->skl_preferred_vco_freq; - if (vco == 0) - vco = 8100000; - cdclk = skl_calc_cdclk(0, vco); - - skl_set_cdclk(dev_priv, cdclk, vco); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - uint32_t cdctl, expected; - - /* - * check if the pre-os intialized the display - * There is SWF18 scratchpad register defined which is set by the - * pre-os which can be used by the OS drivers to check the status - */ - if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) - goto sanitize; - - intel_update_cdclk(dev_priv); - /* Is PLL enabled and locked ? */ - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Noticed in some instances that the freq selection is correct but - * decimal part is programmed wrong from BIOS where pre-os does not - * enable display. Verify the same as well. - */ - cdctl = I915_READ(CDCLK_CTL); - expected = (cdctl & CDCLK_FREQ_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} - -/* Adjust CDclk dividers to allow high res or save power if possible */ -static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); - - if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ - cmd = 2; - else if (cdclk == 266667) - cmd = 1; - else - cmd = 0; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK; - val |= (cmd << DSPFREQGUAR_SHIFT); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - mutex_lock(&dev_priv->sb_lock); - - if (cdclk == 400000) { - u32 divider; - - divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - /* adjust cdclk divider */ - val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); - val &= ~CCK_FREQUENCY_VALUES; - val |= divider; - vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); - - if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & - CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), - 50)) - DRM_ERROR("timed out waiting for CDclk change\n"); - } - - /* adjust self-refresh exit latency value */ - val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); - val &= ~0x7f; - - /* - * For high bandwidth configs, we set a higher latency in the bunit - * so that the core display fetch happens in time to avoid underruns. - */ - if (cdclk == 400000) - val |= 4500 / 250; /* 4.5 usec */ - else - val |= 3000 / 250; /* 3.0 usec */ - vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); - - mutex_unlock(&dev_priv->sb_lock); - - intel_update_cdclk(dev_priv); -} - -static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); - - switch (cdclk) { - case 333333: - case 320000: - case 266667: - case 200000: - break; - default: - MISSING_CASE(cdclk); - return; - } - - /* - * Specs are full of misinformation, but testing on actual - * hardware has shown that we just need to write the desired - * CCK divider into the Punit register. - */ - cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK_CHV; - val |= (cmd << DSPFREQGUAR_SHIFT_CHV); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, - int max_pixclk) -{ - int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? 333333 : 320000; - int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; - - /* - * Really only a few cases to deal with, as only 4 CDclks are supported: - * 200MHz - * 267MHz - * 320/333MHz (depends on HPLL freq) - * 400MHz (VLV only) - * So we check to see whether we're above 90% (VLV) or 95% (CHV) - * of the lower bin and adjust if needed. - * - * We seem to get an unstable or solid color picture at 200MHz. - * Not sure what's wrong. For now use 200MHz only when all pipes - * are off. - */ - if (!IS_CHERRYVIEW(dev_priv) && - max_pixclk > freq_320*limit/100) - return 400000; - else if (max_pixclk > 266667*limit/100) - return freq_320; - else if (max_pixclk > 0) - return 266667; - else - return 200000; -} - -static int glk_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 2 * 158400) - return 316800; - else if (max_pixclk > 2 * 79200) - return 158400; - else - return 79200; -} - -static int bxt_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 576000) - return 624000; - else if (max_pixclk > 384000) - return 576000; - else if (max_pixclk > 288000) - return 384000; - else if (max_pixclk > 144000) - return 288000; - else - return 144000; -} - -/* Compute the max pixel clock for new configuration. */ -static int intel_mode_max_pixclk(struct drm_device *dev, - struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - unsigned max_pixclk = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - int pixclk = 0; - - if (crtc_state->enable) - pixclk = crtc_state->adjusted_mode.crtc_clock; - - intel_state->min_pixclk[i] = pixclk; - } - - for_each_pipe(dev_priv, pipe) - max_pixclk = max(intel_state->min_pixclk[pipe], max_pixclk); - - return max_pixclk; -} - -static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_device *dev = state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int max_pixclk = intel_mode_max_pixclk(dev, state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - - intel_state->cdclk = intel_state->dev_cdclk = - valleyview_calc_cdclk(dev_priv, max_pixclk); - - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = valleyview_calc_cdclk(dev_priv, 0); - - return 0; -} - -static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - int max_pixclk = ilk_max_pixel_rate(state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - int cdclk; - - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(max_pixclk); - else - cdclk = bxt_calc_cdclk(max_pixclk); - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - - if (!intel_state->active_crtcs) { - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); - - intel_state->dev_cdclk = cdclk; - } - - return 0; -} - -static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) -{ - unsigned int credits, default_credits; - - if (IS_CHERRYVIEW(dev_priv)) - default_credits = PFI_CREDIT(12); - else - default_credits = PFI_CREDIT(8); - - if (dev_priv->cdclk_freq >= dev_priv->czclk_freq) { - /* CHV suggested value is 31 or 63 */ - if (IS_CHERRYVIEW(dev_priv)) - credits = PFI_CREDIT_63; - else - credits = PFI_CREDIT(15); - } else { - credits = default_credits; - } - - /* - * WA - write default credits before re-programming - * FIXME: should we also set the resend bit here? - */ - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - default_credits); - - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - credits | PFI_CREDIT_RESEND); - - /* - * FIXME is this guaranteed to clear - * immediately or should we poll for it? - */ - WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); -} - -static void valleyview_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - /* - * FIXME: We can end up here with all power domains off, yet - * with a CDCLK frequency other than the minimum. To account - * for this take the PIPE-A power domain, which covers the HW - * blocks needed for the following programming. This can be - * removed once it's guaranteed that we get here either with - * the minimum CDCLK set, or the required power domains - * enabled. - */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - - if (IS_CHERRYVIEW(dev_priv)) - cherryview_set_cdclk(dev, req_cdclk); - else - valleyview_set_cdclk(dev, req_cdclk); - - vlv_program_pfi_credits(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); -} - static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { @@ -6864,7 +5824,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; - unsigned long domains; + u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; int ret; @@ -7172,7 +6132,7 @@ static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, * * Should measure whether using a lower cdclk w/o IPS */ - return ilk_pipe_pixel_rate(pipe_config) <= + return pipe_config->pixel_rate <= dev_priv->max_cdclk_freq * 95 / 100; } @@ -7196,6 +6156,54 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } +static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) +{ + uint32_t pixel_rate; + + pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; + + /* + * We only use IF-ID interlacing. If we ever use + * PF-ID we'll need to adjust the pixel_rate here. + */ + + if (pipe_config->pch_pfit.enabled) { + uint64_t pipe_w, pipe_h, pfit_w, pfit_h; + uint32_t pfit_size = pipe_config->pch_pfit.size; + + pipe_w = pipe_config->pipe_src_w; + pipe_h = pipe_config->pipe_src_h; + + pfit_w = (pfit_size >> 16) & 0xFFFF; + pfit_h = pfit_size & 0xFFFF; + if (pipe_w < pfit_w) + pipe_w = pfit_w; + if (pipe_h < pfit_h) + pipe_h = pfit_h; + + if (WARN_ON(!pfit_w || !pfit_h)) + return pixel_rate; + + pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, + pfit_w * pfit_h); + } + + return pixel_rate; +} + +static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (HAS_GMCH_DISPLAY(dev_priv)) + /* FIXME calculate proper pipe pixel rate for GMCH pfit */ + crtc_state->pixel_rate = + crtc_state->base.adjusted_mode.crtc_clock; + else + crtc_state->pixel_rate = + ilk_pipe_pixel_rate(crtc_state); +} + static int intel_crtc_compute_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -7242,6 +6250,8 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, adjusted_mode->crtc_hsync_start == adjusted_mode->crtc_hdisplay) return -EINVAL; + intel_crtc_compute_pixel_rate(pipe_config); + if (HAS_IPS(dev_priv)) hsw_compute_ips_config(crtc, pipe_config); @@ -7251,428 +6261,6 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, return 0; } -static int skylake_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - u32 cdctl; - - skl_dpll0_update(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0) - return dev_priv->cdclk_pll.ref; - - cdctl = I915_READ(CDCLK_CTL); - - if (dev_priv->cdclk_pll.vco == 8640000) { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 432000; - case CDCLK_FREQ_337_308: - return 308571; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 617143; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } else { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 450000; - case CDCLK_FREQ_337_308: - return 337500; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 675000; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } - - return dev_priv->cdclk_pll.ref; -} - -static void bxt_de_pll_update(struct drm_i915_private *dev_priv) -{ - u32 val; - - dev_priv->cdclk_pll.ref = 19200; - dev_priv->cdclk_pll.vco = 0; - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; - - val = I915_READ(BXT_DE_PLL_CTL); - dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) * - dev_priv->cdclk_pll.ref; -} - -static int broxton_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - u32 divider; - int div, vco; - - bxt_de_pll_update(dev_priv); - - vco = dev_priv->cdclk_pll.vco; - if (vco == 0) - return dev_priv->cdclk_pll.ref; - - divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; - - switch (divider) { - case BXT_CDCLK_CD2X_DIV_SEL_1: - div = 2; - break; - case BXT_CDCLK_CD2X_DIV_SEL_1_5: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - div = 3; - break; - case BXT_CDCLK_CD2X_DIV_SEL_2: - div = 4; - break; - case BXT_CDCLK_CD2X_DIV_SEL_4: - div = 8; - break; - default: - MISSING_CASE(divider); - return dev_priv->cdclk_pll.ref; - } - - return DIV_ROUND_CLOSEST(vco, div); -} - -static int broadwell_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (freq == LCPLL_CLK_FREQ_54O_BDW) - return 540000; - else if (freq == LCPLL_CLK_FREQ_337_5_BDW) - return 337500; - else - return 675000; -} - -static int haswell_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (IS_HSW_ULT(dev_priv)) - return 337500; - else - return 540000; -} - -static int valleyview_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return vlv_get_cck_clock_hpll(dev_priv, "cdclk", - CCK_DISPLAY_CLOCK_CONTROL); -} - -static int ilk_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 450000; -} - -static int i945_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 400000; -} - -static int i915_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 333333; -} - -static int i9xx_misc_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 200000; -} - -static int pnv_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_267_MHZ_PNV: - return 266667; - case GC_DISPLAY_CLOCK_333_MHZ_PNV: - return 333333; - case GC_DISPLAY_CLOCK_444_MHZ_PNV: - return 444444; - case GC_DISPLAY_CLOCK_200_MHZ_PNV: - return 200000; - default: - DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); - case GC_DISPLAY_CLOCK_133_MHZ_PNV: - return 133333; - case GC_DISPLAY_CLOCK_167_MHZ_PNV: - return 166667; - } -} - -static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; - else { - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_MHZ: - return 333333; - default: - case GC_DISPLAY_CLOCK_190_200_MHZ: - return 190000; - } - } -} - -static int i865_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 266667; -} - -static int i85x_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 hpllcc = 0; - - /* - * 852GM/852GMV only supports 133 MHz and the HPLLCC - * encoding is different :( - * FIXME is this the right way to detect 852GM/852GMV? - */ - if (pdev->revision == 0x1) - return 133333; - - pci_bus_read_config_word(pdev->bus, - PCI_DEVFN(0, 3), HPLLCC, &hpllcc); - - /* Assume that the hardware is in the high speed state. This - * should be the default. - */ - switch (hpllcc & GC_CLOCK_CONTROL_MASK) { - case GC_CLOCK_133_200: - case GC_CLOCK_133_200_2: - case GC_CLOCK_100_200: - return 200000; - case GC_CLOCK_166_250: - return 250000; - case GC_CLOCK_100_133: - return 133333; - case GC_CLOCK_133_266: - case GC_CLOCK_133_266_2: - case GC_CLOCK_166_266: - return 266667; - } - - /* Shouldn't happen */ - return 0; -} - -static int i830_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 133333; -} - -static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) -{ - static const unsigned int blb_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 6400000, - }; - static const unsigned int pnv_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 2666667, - }; - static const unsigned int cl_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 3333333, - [5] = 3566667, - [6] = 4266667, - }; - static const unsigned int elk_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - }; - static const unsigned int ctg_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 2666667, - [5] = 4266667, - }; - const unsigned int *vco_table; - unsigned int vco; - uint8_t tmp = 0; - - /* FIXME other chipsets? */ - if (IS_GM45(dev_priv)) - vco_table = ctg_vco; - else if (IS_G4X(dev_priv)) - vco_table = elk_vco; - else if (IS_I965GM(dev_priv)) - vco_table = cl_vco; - else if (IS_PINEVIEW(dev_priv)) - vco_table = pnv_vco; - else if (IS_G33(dev_priv)) - vco_table = blb_vco; - else - return 0; - - tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); - - vco = vco_table[tmp & 0x7]; - if (vco == 0) - DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); - else - DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); - - return vco; -} - -static int gm45_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = (tmp >> 12) & 0x1; - - switch (vco) { - case 2666667: - case 4000000: - case 5333333: - return cdclk_sel ? 333333 : 222222; - case 3200000: - return cdclk_sel ? 320000 : 228571; - default: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", vco, tmp); - return 222222; - } -} - -static int i965gm_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 16, 10, 8 }; - static const uint8_t div_4000[] = { 20, 12, 10 }; - static const uint8_t div_5333[] = { 24, 16, 14 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = ((tmp >> 8) & 0x1f) - 1; - - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; - - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; - } - - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); - -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", vco, tmp); - return 200000; -} - -static int g33_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; - static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; - static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; - static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = (tmp >> 4) & 0x7; - - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; - - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 4800000: - div_table = div_4800; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; - } - - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); - -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", vco, tmp); - return 190476; -} - static void intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) { @@ -8767,7 +7355,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -9808,7 +8397,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->format->format); fb->pitches[0] = (val & 0x3ff) * stride_mult; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -9906,7 +8496,8 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -10234,245 +8825,6 @@ void hsw_disable_pc8(struct drm_i915_private *dev_priv) } } -static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; - - bxt_set_cdclk(to_i915(dev), req_cdclk); -} - -static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, - int pixel_rate) -{ - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); - - /* BSpec says "Do not use DisplayPort with CDCLK less than - * 432 MHz, audio enabled, port width x4, and link rate - * HBR2 (5.4 GHz), or else there may be audio corruption or - * screen corruption." - */ - if (intel_crtc_has_dp_encoder(crtc_state) && - crtc_state->has_audio && - crtc_state->port_clock >= 540000 && - crtc_state->lane_count == 4) - pixel_rate = max(432000, pixel_rate); - - return pixel_rate; -} - -/* compute the max rate for new configuration */ -static int ilk_max_pixel_rate(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct drm_crtc *crtc; - struct drm_crtc_state *cstate; - struct intel_crtc_state *crtc_state; - unsigned max_pixel_rate = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, cstate, i) { - int pixel_rate; - - crtc_state = to_intel_crtc_state(cstate); - if (!crtc_state->base.enable) { - intel_state->min_pixclk[i] = 0; - continue; - } - - pixel_rate = ilk_pipe_pixel_rate(crtc_state); - - if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) - pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, - pixel_rate); - - intel_state->min_pixclk[i] = pixel_rate; - } - - for_each_pipe(dev_priv, pipe) - max_pixel_rate = max(intel_state->min_pixclk[pipe], max_pixel_rate); - - return max_pixel_rate; -} - -static void broadwell_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t val, data; - int ret; - - if (WARN((I915_READ(LCPLL_CTL) & - (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | - LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | - LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | - LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, - "trying to change cdclk frequency with cdclk not enabled\n")) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, - BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("failed to inform pcode about cdclk change\n"); - return; - } - - val = I915_READ(LCPLL_CTL); - val |= LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us(I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) - DRM_ERROR("Switching to FCLK failed\n"); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CLK_FREQ_MASK; - - switch (cdclk) { - case 450000: - val |= LCPLL_CLK_FREQ_450; - data = 0; - break; - case 540000: - val |= LCPLL_CLK_FREQ_54O_BDW; - data = 1; - break; - case 337500: - val |= LCPLL_CLK_FREQ_337_5_BDW; - data = 2; - break; - case 675000: - val |= LCPLL_CLK_FREQ_675_BDW; - data = 3; - break; - default: - WARN(1, "invalid cdclk frequency\n"); - return; - } - - I915_WRITE(LCPLL_CTL, val); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us((I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) - DRM_ERROR("Switching back to LCPLL failed\n"); - - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); - mutex_unlock(&dev_priv->rps.hw_lock); - - I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); - - intel_update_cdclk(dev_priv); - - WARN(cdclk != dev_priv->cdclk_freq, - "cdclk requested %d kHz but got %d kHz\n", - cdclk, dev_priv->cdclk_freq); -} - -static int broadwell_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; -} - -static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int max_pixclk = ilk_max_pixel_rate(state); - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = broadwell_calc_cdclk(max_pixclk); - - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = broadwell_calc_cdclk(0); - - return 0; -} - -static void broadwell_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - broadwell_set_cdclk(dev, req_cdclk); -} - -static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - const int max_pixclk = ilk_max_pixel_rate(state); - int vco = intel_state->cdclk_pll_vco; - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = skl_calc_cdclk(max_pixclk, vco); - - /* - * FIXME move the cdclk caclulation to - * compute_config() so we can fail gracegully. - */ - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - cdclk = dev_priv->max_cdclk_freq; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = skl_calc_cdclk(0, vco); - - return 0; -} - -static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(old_state); - unsigned int req_cdclk = intel_state->dev_cdclk; - unsigned int req_vco = intel_state->cdclk_pll_vco; - - skl_set_cdclk(dev_priv, req_cdclk, req_vco); -} - static int haswell_crtc_compute_clock(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state) { @@ -10564,7 +8916,7 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, static bool hsw_get_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10606,7 +8958,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); @@ -10615,7 +8967,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10633,7 +8985,7 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); /* * The PLL needs to be enabled with a valid divider @@ -10673,7 +9025,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skylake_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_get_ddi_pll(dev_priv, port, pipe_config); @@ -10708,13 +9060,13 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; - unsigned long power_domain_mask; + u64 power_domain_mask; bool active; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - power_domain_mask = BIT(power_domain); + power_domain_mask = BIT_ULL(power_domain); pipe_config->shared_dpll = NULL; @@ -10748,7 +9100,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe); if (intel_display_power_get_if_enabled(dev_priv, power_domain)) { - power_domain_mask |= BIT(power_domain); + power_domain_mask |= BIT_ULL(power_domain); if (INTEL_GEN(dev_priv) >= 9) skylake_get_pfit_config(crtc, pipe_config); else @@ -10971,9 +9323,8 @@ static struct drm_display_mode load_detect_mode = { }; struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_framebuffer *intel_fb; int ret; @@ -10982,7 +9333,7 @@ __intel_framebuffer_create(struct drm_device *dev, if (!intel_fb) return ERR_PTR(-ENOMEM); - ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj); + ret = intel_framebuffer_init(intel_fb, obj, mode_cmd); if (ret) goto err; @@ -10993,23 +9344,6 @@ err: return ERR_PTR(ret); } -static struct drm_framebuffer * -intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) -{ - struct drm_framebuffer *fb; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - fb = __intel_framebuffer_create(dev, mode_cmd, obj); - mutex_unlock(&dev->struct_mutex); - - return fb; -} - static u32 intel_framebuffer_pitch_for_width(int width, int bpp) { @@ -11044,7 +9378,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, bpp); mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); @@ -11730,14 +10064,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Can't queue multiple flips, so wait for the previous * one to finish before executing the next. @@ -11746,13 +10078,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, 0); /* aux display base address, unused */ + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = 0; /* aux display base address, unused */ return 0; } @@ -11764,26 +10095,23 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); if (intel_crtc->plane) flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } @@ -11795,25 +10123,22 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* i965+ uses the linear or tiled offsets from the * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | - intel_fb_modifier_to_tiling(fb->modifier)); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset | + intel_fb_modifier_to_tiling(fb->modifier); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@ -11821,7 +10146,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -11833,21 +10158,17 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page @@ -11857,7 +10178,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -11870,9 +10191,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, uint32_t flags) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t plane_bit = 0; + u32 *cs, plane_bit = 0; int len, ret; switch (intel_crtc->plane) { @@ -11916,9 +10236,9 @@ static int intel_gen7_queue_flip(struct drm_device *dev, if (ret) return ret; - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Unmask the flip-done completion message. Note that the bspec says that * we should do this for both the BCS and RCS, and that we must not unmask @@ -11930,31 +10250,28 @@ static int intel_gen7_queue_flip(struct drm_device *dev, * to zero does lead to lockups within MI_DISPLAY_FLIP. */ if (req->engine->id == RCS) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | - DERRMR_PIPEB_PRI_FLIP_DONE | - DERRMR_PIPEC_PRI_FLIP_DONE)); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = ~(DERRMR_PIPEA_PRI_FLIP_DONE | + DERRMR_PIPEB_PRI_FLIP_DONE | + DERRMR_PIPEC_PRI_FLIP_DONE); if (IS_GEN8(dev_priv)) - intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT); + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT; else - intel_ring_emit(ring, MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, - i915_ggtt_offset(req->engine->scratch) + 256); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = i915_ggtt_offset(req->engine->scratch) + 256; if (IS_GEN8(dev_priv)) { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } } - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, (MI_NOOP)); + *cs++ = MI_DISPLAY_FLIP_I915 | plane_bit; + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } @@ -12146,6 +10463,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) spin_unlock(&dev->event_lock); } +__maybe_unused static int intel_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, @@ -12641,7 +10959,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, ret = skl_update_scaler_crtc(pipe_config); if (!ret) - ret = intel_atomic_setup_scalers(dev, intel_crtc, + ret = intel_atomic_setup_scalers(dev_priv, intel_crtc, pipe_config); } @@ -12799,9 +11117,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, DRM_DEBUG_KMS("adjusted mode:\n"); drm_mode_debug_printmodeline(&pipe_config->base.adjusted_mode); intel_dump_crtc_timings(&pipe_config->base.adjusted_mode); - DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d\n", + DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d, pixel rate %d\n", pipe_config->port_clock, - pipe_config->pipe_src_w, pipe_config->pipe_src_h); + pipe_config->pipe_src_w, pipe_config->pipe_src_h, + pipe_config->pixel_rate); if (INTEL_GEN(dev_priv) >= 9) DRM_DEBUG_KMS("num_scalers: %d, scaler_users: 0x%x, scaler_id: %d\n", @@ -13059,8 +11378,11 @@ encoder_retry: } /* Dithering seems to not pass-through bits correctly when it should, so - * only enable it on 6bpc panels. */ - pipe_config->dither = pipe_config->pipe_bpp == 6*3; + * only enable it on 6bpc panels and when its not a compliance + * test requesting 6bpc video pattern. + */ + pipe_config->dither = (pipe_config->pipe_bpp == 6*3) && + !pipe_config->dither_force_disable; DRM_DEBUG_KMS("hw max bpp: %i, pipe bpp: %i, dithering: %i\n", base_bpp, pipe_config->pipe_bpp, pipe_config->dither); @@ -13374,6 +11696,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, } PIPE_CONF_CHECK_I(scaler_state.scaler_id); + PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate); } /* BDW+ don't expose a synchronous way to read the state */ @@ -13665,6 +11988,8 @@ verify_crtc_state(struct drm_crtc *crtc, } } + intel_crtc_compute_pixel_rate(pipe_config); + if (!new_crtc_state->active) return; @@ -13992,6 +12317,8 @@ static int intel_modeset_checks(struct drm_atomic_state *state) intel_state->modeset = true; intel_state->active_crtcs = dev_priv->active_crtcs; + intel_state->cdclk.logical = dev_priv->cdclk.logical; + intel_state->cdclk.actual = dev_priv->cdclk.actual; for_each_crtc_in_state(state, crtc, crtc_state, i) { if (crtc_state->active) @@ -14011,38 +12338,35 @@ static int intel_modeset_checks(struct drm_atomic_state *state) * adjusted_mode bits in the crtc directly. */ if (dev_priv->display.modeset_calc_cdclk) { - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->cdclk_pll.vco; - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->skl_preferred_vco_freq; - ret = dev_priv->display.modeset_calc_cdclk(state); if (ret < 0) return ret; /* - * Writes to dev_priv->atomic_cdclk_freq must protected by + * Writes to dev_priv->cdclk.logical must protected by * holding all the crtc locks, even if we don't end up * touching the hardware */ - if (intel_state->cdclk != dev_priv->atomic_cdclk_freq) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.logical, + &intel_state->cdclk.logical)) { ret = intel_lock_all_pipes(state); if (ret < 0) return ret; } /* All pipes must be switched off while we change the cdclk. */ - if (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.actual, + &intel_state->cdclk.actual)) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; } - DRM_DEBUG_KMS("New cdclk calculated to be atomic %u, actual %u\n", - intel_state->cdclk, intel_state->dev_cdclk); + DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", + intel_state->cdclk.logical.cdclk, + intel_state->cdclk.actual.cdclk); } else { - to_intel_atomic_state(state)->cdclk = dev_priv->atomic_cdclk_freq; + to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.logical; } intel_modeset_clear_plls(state); @@ -14145,7 +12469,7 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; } else { - intel_state->cdclk = dev_priv->atomic_cdclk_freq; + intel_state->cdclk.logical = dev_priv->cdclk.logical; } ret = drm_atomic_helper_check_planes(dev, state); @@ -14379,6 +12703,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state, } while (progress); } +static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) +{ + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + +static void intel_atomic_helper_free_state_worker(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + + intel_atomic_helper_free_state(dev_priv); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -14388,7 +12730,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; bool hw_check = intel_state->modeset; - unsigned long put_domains[I915_MAX_PIPES] = {}; + u64 put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; int i; @@ -14449,10 +12791,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (intel_state->modeset) { drm_atomic_helper_update_legacy_modeset_state(state->dev, state); - if (dev_priv->display.modeset_commit_cdclk && - (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco)) - dev_priv->display.modeset_commit_cdclk(state); + intel_set_cdclk(dev_priv, &dev_priv->cdclk.actual); /* * SKL workaround: bspec recommends we disable the SAGV when we @@ -14545,6 +12884,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * can happen also when the device is completely off. */ intel_uncore_arm_unclaimed_mmio_detection(dev_priv); + + intel_atomic_helper_free_state(dev_priv); } static void intel_atomic_commit_work(struct work_struct *work) @@ -14638,7 +12979,8 @@ static int intel_atomic_commit(struct drm_device *dev, memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, sizeof(intel_state->min_pixclk)); dev_priv->active_crtcs = intel_state->active_crtcs; - dev_priv->atomic_cdclk_freq = intel_state->cdclk; + dev_priv->cdclk.logical = intel_state->cdclk.logical; + dev_priv->cdclk.actual = intel_state->cdclk.actual; } drm_atomic_state_get(state); @@ -14738,7 +13080,7 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .set_property = drm_atomic_helper_crtc_set_property, .destroy = intel_crtc_destroy, - .page_flip = intel_crtc_page_flip, + .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = intel_crtc_duplicate_state, .atomic_destroy_state = intel_crtc_destroy_state, .set_crc_source = intel_crtc_set_crc_source, @@ -14770,6 +13112,29 @@ intel_prepare_plane_fb(struct drm_plane *plane, struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); int ret; + if (obj) { + if (plane->type == DRM_PLANE_TYPE_CURSOR && + INTEL_INFO(dev_priv)->cursor_needs_physical) { + const int align = IS_I830(dev_priv) ? 16 * 1024 : 256; + + ret = i915_gem_object_attach_phys(obj, align); + if (ret) { + DRM_DEBUG_KMS("failed to attach phys object\n"); + return ret; + } + } else { + struct i915_vma *vma; + + vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); + if (IS_ERR(vma)) { + DRM_DEBUG_KMS("failed to pin object\n"); + return PTR_ERR(vma); + } + + to_intel_plane_state(new_state)->vma = vma; + } + } + if (!obj && !old_obj) return 0; @@ -14822,26 +13187,6 @@ intel_prepare_plane_fb(struct drm_plane *plane, i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); } - if (plane->type == DRM_PLANE_TYPE_CURSOR && - INTEL_INFO(dev_priv)->cursor_needs_physical) { - int align = IS_I830(dev_priv) ? 16 * 1024 : 256; - ret = i915_gem_object_attach_phys(obj, align); - if (ret) { - DRM_DEBUG_KMS("failed to attach phys object\n"); - return ret; - } - } else { - struct i915_vma *vma; - - vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); - if (IS_ERR(vma)) { - DRM_DEBUG_KMS("failed to pin object\n"); - return PTR_ERR(vma); - } - - to_intel_plane_state(new_state)->vma = vma; - } - return 0; } @@ -14869,16 +13214,22 @@ intel_cleanup_plane_fb(struct drm_plane *plane, int skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv; int max_scale; - int crtc_clock, cdclk; + int crtc_clock, max_dotclk; if (!intel_crtc || !crtc_state->base.enable) return DRM_PLANE_HELPER_NO_SCALING; + dev_priv = to_i915(intel_crtc->base.dev); + crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk; + max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; - if (WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock)) + if (IS_GEMINILAKE(dev_priv)) + max_dotclk *= 2; + + if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) return DRM_PLANE_HELPER_NO_SCALING; /* @@ -14887,7 +13238,8 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state * or * cdclk/crtc_clock */ - max_scale = min((1 << 16) * 3 - 1, (1 << 8) * ((cdclk << 8) / crtc_clock)); + max_scale = min((1 << 16) * 3 - 1, + (1 << 8) * ((max_dotclk << 8) / crtc_clock)); return max_scale; } @@ -15680,7 +14032,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) */ found = I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED; /* WaIgnoreDDIAStrap: skl */ - if (found || IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (found || IS_GEN9_BC(dev_priv)) intel_ddi_init(dev_priv, PORT_A); /* DDI B, C and D detection is indicated by the SFUSE_STRAP @@ -15696,7 +14048,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ - if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + if (IS_GEN9_BC(dev_priv) && (dev_priv->vbt.ddi_port_info[PORT_E].supports_dp || dev_priv->vbt.ddi_port_info[PORT_E].supports_dvi || dev_priv->vbt.ddi_port_info[PORT_E].supports_hdmi)) @@ -15829,14 +14181,13 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) { - struct drm_device *dev = fb->dev; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); drm_framebuffer_cleanup(fb); - mutex_lock(&dev->struct_mutex); - WARN_ON(!intel_fb->obj->framebuffer_references--); + + WARN_ON(atomic_dec_return(&intel_fb->obj->framebuffer_references) < 0); i915_gem_object_put(intel_fb->obj); - mutex_unlock(&dev->struct_mutex); + kfree(intel_fb); } @@ -15861,15 +14212,10 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, struct drm_clip_rect *clips, unsigned num_clips) { - struct drm_device *dev = fb->dev; - struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - struct drm_i915_gem_object *obj = intel_fb->obj; + struct drm_i915_gem_object *obj = intel_fb_obj(fb); - mutex_lock(&dev->struct_mutex); - if (obj->pin_display && obj->cache_dirty) - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - mutex_unlock(&dev->struct_mutex); + i915_gem_object_flush_if_display(obj); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); return 0; } @@ -15884,7 +14230,7 @@ static u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, uint64_t fb_modifier, uint32_t pixel_format) { - u32 gen = INTEL_INFO(dev_priv)->gen; + u32 gen = INTEL_GEN(dev_priv); if (gen >= 9) { int cpp = drm_format_plane_cpp(pixel_format, 0); @@ -15893,8 +14239,7 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, * pixels and 32K bytes." */ return min(8192 * cpp, 32768); - } else if (gen >= 5 && !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv)) { + } else if (gen >= 5 && !HAS_GMCH_DISPLAY(dev_priv)) { return 32*1024; } else if (gen >= 4) { if (fb_modifier == I915_FORMAT_MOD_X_TILED) @@ -15912,18 +14257,17 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, } } -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *intel_fb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); unsigned int tiling = i915_gem_object_get_tiling(obj); - int ret; u32 pitch_limit, stride_alignment; struct drm_format_name_buf format_name; + int ret = -EINVAL; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + atomic_inc(&obj->framebuffer_references); if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* @@ -15933,14 +14277,14 @@ static int intel_framebuffer_init(struct drm_device *dev, if (tiling != I915_TILING_NONE && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); - return -EINVAL; + goto err; } } else { if (tiling == I915_TILING_X) { mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; } else if (tiling == I915_TILING_Y) { DRM_DEBUG("No Y tiling for legacy addfb\n"); - return -EINVAL; + goto err; } } @@ -15951,7 +14295,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (INTEL_GEN(dev_priv) < 9) { DRM_DEBUG("Unsupported tiling 0x%llx!\n", mode_cmd->modifier[0]); - return -EINVAL; + goto err; } case DRM_FORMAT_MOD_NONE: case I915_FORMAT_MOD_X_TILED: @@ -15959,7 +14303,7 @@ static int intel_framebuffer_init(struct drm_device *dev, default: DRM_DEBUG("Unsupported fb modifier 0x%llx!\n", mode_cmd->modifier[0]); - return -EINVAL; + goto err; } /* @@ -15978,7 +14322,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (mode_cmd->pitches[0] & (stride_alignment - 1)) { DRM_DEBUG("pitch (%d) must be at least %u byte aligned\n", mode_cmd->pitches[0], stride_alignment); - return -EINVAL; + goto err; } pitch_limit = intel_fb_pitch_limit(dev_priv, mode_cmd->modifier[0], @@ -15988,7 +14332,7 @@ static int intel_framebuffer_init(struct drm_device *dev, mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ? "tiled" : "linear", mode_cmd->pitches[0], pitch_limit); - return -EINVAL; + goto err; } /* @@ -16000,7 +14344,7 @@ static int intel_framebuffer_init(struct drm_device *dev, DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", mode_cmd->pitches[0], i915_gem_object_get_stride(obj)); - return -EINVAL; + goto err; } /* Reject formats not supported by any plane early. */ @@ -16059,24 +14403,29 @@ static int intel_framebuffer_init(struct drm_device *dev, /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ if (mode_cmd->offsets[0] != 0) - return -EINVAL; + goto err; - drm_helper_mode_fill_fb_struct(dev, &intel_fb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(&dev_priv->drm, + &intel_fb->base, mode_cmd); intel_fb->obj = obj; ret = intel_fill_fb_info(dev_priv, &intel_fb->base); if (ret) return ret; - ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs); + ret = drm_framebuffer_init(obj->base.dev, + &intel_fb->base, + &intel_fb_funcs); if (ret) { DRM_ERROR("framebuffer init failed %d\n", ret); - return ret; + goto err; } - intel_fb->obj->framebuffer_references++; - return 0; + +err: + atomic_dec(&obj->framebuffer_references); + return ret; } static struct drm_framebuffer * @@ -16092,7 +14441,7 @@ intel_user_framebuffer_create(struct drm_device *dev, if (!obj) return ERR_PTR(-ENOENT); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); @@ -16126,6 +14475,8 @@ static const struct drm_mode_config_funcs intel_mode_funcs = { */ void intel_init_display_hooks(struct drm_i915_private *dev_priv) { + intel_init_cdclk_hooks(dev_priv); + if (INTEL_INFO(dev_priv)->gen >= 9) { dev_priv->display.get_pipe_config = haswell_get_pipe_config; dev_priv->display.get_initial_plane_config = @@ -16194,62 +14545,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.crtc_disable = i9xx_crtc_disable; } - /* Returns the core display clock speed */ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - dev_priv->display.get_display_clock_speed = - skylake_get_display_clock_speed; - else if (IS_GEN9_LP(dev_priv)) - dev_priv->display.get_display_clock_speed = - broxton_get_display_clock_speed; - else if (IS_BROADWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - broadwell_get_display_clock_speed; - else if (IS_HASWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - haswell_get_display_clock_speed; - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - valleyview_get_display_clock_speed; - else if (IS_GEN5(dev_priv)) - dev_priv->display.get_display_clock_speed = - ilk_get_display_clock_speed; - else if (IS_I945G(dev_priv) || IS_I965G(dev_priv) || - IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) - dev_priv->display.get_display_clock_speed = - i945_get_display_clock_speed; - else if (IS_GM45(dev_priv)) - dev_priv->display.get_display_clock_speed = - gm45_get_display_clock_speed; - else if (IS_I965GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i965gm_get_display_clock_speed; - else if (IS_PINEVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - pnv_get_display_clock_speed; - else if (IS_G33(dev_priv) || IS_G4X(dev_priv)) - dev_priv->display.get_display_clock_speed = - g33_get_display_clock_speed; - else if (IS_I915G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915_get_display_clock_speed; - else if (IS_I945GM(dev_priv) || IS_I845G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i9xx_misc_get_display_clock_speed; - else if (IS_I915GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915gm_get_display_clock_speed; - else if (IS_I865G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i865_get_display_clock_speed; - else if (IS_I85X(dev_priv)) - dev_priv->display.get_display_clock_speed = - i85x_get_display_clock_speed; - else { /* 830 */ - WARN(!IS_I830(dev_priv), "Unknown platform. Assuming 133 MHz CDCLK\n"); - dev_priv->display.get_display_clock_speed = - i830_get_display_clock_speed; - } - if (IS_GEN5(dev_priv)) { dev_priv->display.fdi_link_train = ironlake_fdi_link_train; } else if (IS_GEN6(dev_priv)) { @@ -16261,28 +14556,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.fdi_link_train = hsw_fdi_link_train; } - if (IS_BROADWELL(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - broadwell_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - broadwell_modeset_calc_cdclk; - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - valleyview_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - valleyview_modeset_calc_cdclk; - } else if (IS_GEN9_LP(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - bxt_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - bxt_modeset_calc_cdclk; - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - skl_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - skl_modeset_calc_cdclk; - } - if (dev_priv->info.gen >= 9) dev_priv->display.update_crtcs = skl_update_crtcs; else @@ -16509,8 +14782,7 @@ void intel_modeset_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); intel_update_cdclk(dev_priv); - - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; intel_init_clock_gating(dev_priv); } @@ -16599,18 +14871,6 @@ fail: drm_modeset_acquire_fini(&ctx); } -static void intel_atomic_helper_free_state(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), atomic_helper.free_work); - struct intel_atomic_state *state, *next; - struct llist_node *freed; - - freed = llist_del_all(&dev_priv->atomic_helper.free_list); - llist_for_each_entry_safe(state, next, freed, freed) - drm_atomic_state_put(&state->base); -} - int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16631,7 +14891,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; INIT_WORK(&dev_priv->atomic_helper.free_work, - intel_atomic_helper_free_state); + intel_atomic_helper_free_state_worker); intel_init_quirks(dev); @@ -16698,7 +14958,7 @@ int intel_modeset_init(struct drm_device *dev) intel_update_czclk(dev_priv); intel_update_cdclk(dev_priv); - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; intel_shared_dpll_init(dev); @@ -17130,10 +15390,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) */ crtc_state->base.mode.private_flags = I915_MODE_FLAG_INHERITED; - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - pixclk = ilk_pipe_pixel_rate(crtc_state); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc_state->base.adjusted_mode.crtc_clock; + intel_crtc_compute_pixel_rate(crtc_state); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pixclk = crtc_state->pixel_rate; else WARN_ON(dev_priv->display.modeset_calc_cdclk); @@ -17151,6 +15412,24 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } } +static void +get_encoder_power_domains(struct drm_i915_private *dev_priv) +{ + struct intel_encoder *encoder; + + for_each_intel_encoder(&dev_priv->drm, encoder) { + u64 get_domains; + enum intel_display_power_domain domain; + + if (!encoder->get_power_domains) + continue; + + get_domains = encoder->get_power_domains(encoder); + for_each_power_domain(domain, get_domains) + intel_display_power_get(dev_priv, domain); + } +} + /* Scan out the current hw modeset state, * and sanitizes it to the current state */ @@ -17166,6 +15445,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ + get_encoder_power_domains(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } @@ -17200,7 +15481,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev) ilk_wm_get_hw_state(dev); for_each_intel_crtc(dev, crtc) { - unsigned long put_domains; + u64 put_domains; put_domains = modeset_get_crtc_power_domains(&crtc->base, crtc->config); if (WARN_ON(put_domains)) @@ -17208,6 +15489,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) } intel_display_set_init_power(dev_priv, false); + intel_power_domains_verify_state(dev_priv); + intel_fbc_init_pipe_state(dev_priv); } @@ -17491,9 +15774,9 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) void intel_display_print_error_state(struct drm_i915_error_state_buf *m, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error) { + struct drm_i915_private *dev_priv = m->i915; int i; if (!error) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d1670b8afbf5..fd96a6cf7326 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -28,8 +28,10 @@ #include <linux/i2c.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/types.h> #include <linux/notifier.h> #include <linux/reboot.h> +#include <asm/byteorder.h> #include <drm/drmP.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> @@ -226,7 +228,7 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) if (IS_GEN9_LP(dev_priv)) { *source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { *source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else { @@ -394,14 +396,12 @@ static void pps_lock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; /* * See vlv_power_sequencer_reset() why we need * a power domain reference here. */ - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); mutex_lock(&dev_priv->pps_mutex); } @@ -412,12 +412,10 @@ static void pps_unlock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; mutex_unlock(&dev_priv->pps_mutex); - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void @@ -916,7 +914,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * divide by 2000 and use that */ if (intel_dig_port->port == PORT_A) - return DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 2000); + return DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.cdclk, 2000); else return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); } @@ -1593,6 +1591,13 @@ static int intel_dp_compute_bpp(struct intel_dp *intel_dp, if (bpc > 0) bpp = min(bpp, 3*bpc); + /* For DP Compliance we override the computed bpp for the pipe */ + if (intel_dp->compliance.test_data.bpc != 0) { + pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; + pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", + pipe_config->pipe_bpp); + } return bpp; } @@ -1613,6 +1618,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Conveniently, the link BW constants become indices with a shift...*/ int min_clock = 0; int max_clock; + int link_rate_index; int bpp, mode_rate; int link_avail, link_clock; int common_rates[DP_MAX_SUPPORTED_RATES] = {}; @@ -1654,6 +1660,15 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; + /* Use values requested by Compliance Test Request */ + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + intel_dp->compliance.test_link_rate); + if (link_rate_index >= 0) + min_clock = max_clock = link_rate_index; + min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; + } DRM_DEBUG_KMS("DP link computation with max lane count %i " "max bw %d pixel clock %iKHz\n", max_lane_count, common_rates[max_clock], @@ -1753,8 +1768,7 @@ found: * DPLL0 VCO may need to be adjusted to get the correct * clock for eDP. This will affect cdclk as well. */ - if (is_edp(intel_dp) && - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) { + if (is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { int vco; switch (pipe_config->port_clock / 2) { @@ -1767,7 +1781,7 @@ found: break; } - to_intel_atomic_state(pipe_config->base.state)->cdclk_pll_vco = vco; + to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; } if (!HAS_DDI(dev_priv)) @@ -1987,9 +2001,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; bool need_to_disable = !intel_dp->want_panel_vdd; @@ -2005,8 +2017,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) if (edp_have_panel_vdd(intel_dp)) return need_to_disable; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", port_name(intel_dig_port->port)); @@ -2064,8 +2075,6 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; @@ -2095,8 +2104,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if ((pp & PANEL_POWER_ON) == 0) intel_dp->panel_power_off_time = ktime_get_boottime(); - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void edp_panel_vdd_work(struct work_struct *__work) @@ -2209,11 +2217,8 @@ void intel_edp_panel_on(struct intel_dp *intel_dp) static void edp_panel_off(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_ctrl_reg; @@ -2245,8 +2250,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) wait_panel_off(intel_dp); /* We got a reference when we enabled the VDD. */ - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } void intel_edp_panel_off(struct intel_dp *intel_dp) @@ -2492,12 +2496,11 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum port port = dp_to_dig_port(intel_dp)->port; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -2533,7 +2536,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -3080,9 +3083,8 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) if (IS_GEN9_LP(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (INTEL_GEN(dev_priv) >= 9) { - if (dev_priv->vbt.edp.low_vswing && port == PORT_A) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + return intel_ddi_dp_voltage_max(encoder); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (IS_GEN7(dev_priv) && port == PORT_A) @@ -3922,19 +3924,112 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_ACK; - return test_result; + int status = 0; + int min_lane_count = 1; + int common_rates[DP_MAX_SUPPORTED_RATES] = {}; + int link_rate_index, test_link_rate; + uint8_t test_lane_count, test_link_bw; + /* (DP CTS 1.2) + * 4.3.1.11 + */ + /* Read the TEST_LANE_COUNT and TEST_LINK_RTAE fields (DP CTS 3.1.4) */ + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LANE_COUNT, + &test_lane_count); + + if (status <= 0) { + DRM_DEBUG_KMS("Lane count read failed\n"); + return DP_TEST_NAK; + } + test_lane_count &= DP_MAX_LANE_COUNT_MASK; + /* Validate the requested lane count */ + if (test_lane_count < min_lane_count || + test_lane_count > intel_dp->max_sink_lane_count) + return DP_TEST_NAK; + + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE, + &test_link_bw); + if (status <= 0) { + DRM_DEBUG_KMS("Link Rate read failed\n"); + return DP_TEST_NAK; + } + /* Validate the requested link rate */ + test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw); + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + test_link_rate); + if (link_rate_index < 0) + return DP_TEST_NAK; + + intel_dp->compliance.test_lane_count = test_lane_count; + intel_dp->compliance.test_link_rate = test_link_rate; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; - return test_result; + uint8_t test_pattern; + uint16_t test_misc; + __be16 h_width, v_height; + int status = 0; + + /* Read the TEST_PATTERN (DP CTS 3.1.5) */ + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN, + &test_pattern, 1); + if (status <= 0) { + DRM_DEBUG_KMS("Test pattern read failed\n"); + return DP_TEST_NAK; + } + if (test_pattern != DP_COLOR_RAMP) + return DP_TEST_NAK; + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_H_WIDTH_HI, + &h_width, 2); + if (status <= 0) { + DRM_DEBUG_KMS("H Width read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_V_HEIGHT_HI, + &v_height, 2); + if (status <= 0) { + DRM_DEBUG_KMS("V Height read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0, + &test_misc, 1); + if (status <= 0) { + DRM_DEBUG_KMS("TEST MISC read failed\n"); + return DP_TEST_NAK; + } + if ((test_misc & DP_TEST_COLOR_FORMAT_MASK) != DP_COLOR_FORMAT_RGB) + return DP_TEST_NAK; + if (test_misc & DP_TEST_DYNAMIC_RANGE_CEA) + return DP_TEST_NAK; + switch (test_misc & DP_TEST_BIT_DEPTH_MASK) { + case DP_TEST_BIT_DEPTH_6: + intel_dp->compliance.test_data.bpc = 6; + break; + case DP_TEST_BIT_DEPTH_8: + intel_dp->compliance.test_data.bpc = 8; + break; + default: + return DP_TEST_NAK; + } + + intel_dp->compliance.test_data.video_pattern = test_pattern; + intel_dp->compliance.test_data.hdisplay = be16_to_cpu(h_width); + intel_dp->compliance.test_data.vdisplay = be16_to_cpu(v_height); + /* Set test active flag here so userspace doesn't interrupt things */ + intel_dp->compliance.test_active = 1; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; + uint8_t test_result = DP_TEST_ACK; struct intel_connector *intel_connector = intel_dp->attached_connector; struct drm_connector *connector = &intel_connector->base; @@ -3969,7 +4064,7 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) DRM_DEBUG_KMS("Failed to write EDID checksum\n"); test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE; - intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_STANDARD; + intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_PREFERRED; } /* Set test active flag here so userspace doesn't interrupt things */ @@ -3987,45 +4082,42 @@ static uint8_t intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) static void intel_dp_handle_test_request(struct intel_dp *intel_dp) { uint8_t response = DP_TEST_NAK; - uint8_t rxdata = 0; - int status = 0; + uint8_t request = 0; + int status; - status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_REQUEST, &rxdata, 1); + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_REQUEST, &request); if (status <= 0) { DRM_DEBUG_KMS("Could not read test request from sink\n"); goto update_status; } - switch (rxdata) { + switch (request) { case DP_TEST_LINK_TRAINING: DRM_DEBUG_KMS("LINK_TRAINING test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_TRAINING; response = intel_dp_autotest_link_training(intel_dp); break; case DP_TEST_LINK_VIDEO_PATTERN: DRM_DEBUG_KMS("TEST_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_VIDEO_PATTERN; response = intel_dp_autotest_video_pattern(intel_dp); break; case DP_TEST_LINK_EDID_READ: DRM_DEBUG_KMS("EDID test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_EDID_READ; response = intel_dp_autotest_edid(intel_dp); break; case DP_TEST_LINK_PHY_TEST_PATTERN: DRM_DEBUG_KMS("PHY_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_PHY_TEST_PATTERN; response = intel_dp_autotest_phy_pattern(intel_dp); break; default: - DRM_DEBUG_KMS("Invalid test request '%02x'\n", rxdata); + DRM_DEBUG_KMS("Invalid test request '%02x'\n", request); break; } + if (response & DP_TEST_ACK) + intel_dp->compliance.test_type = request; + update_status: - status = drm_dp_dpcd_write(&intel_dp->aux, - DP_TEST_RESPONSE, - &response, 1); + status = drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_RESPONSE, response); if (status <= 0) DRM_DEBUG_KMS("Could not write test response to sink\n"); } @@ -4137,9 +4229,8 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) if (!intel_dp->lane_count) return; - /* if link training is requested we should perform it always */ - if ((intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) || - (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count))) { + /* Retrain if Channel EQ or CR not ok */ + if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n", intel_encoder->base.name); @@ -4164,6 +4255,7 @@ static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); + struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -4197,7 +4289,7 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) sink_irq_vector); if (sink_irq_vector & DP_AUTOMATED_TEST_REQUEST) - DRM_DEBUG_DRIVER("Test request in short pulse not handled\n"); + intel_dp_handle_test_request(intel_dp); if (sink_irq_vector & (DP_CP_IRQ | DP_SINK_SPECIFIC_IRQ)) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } @@ -4205,6 +4297,11 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); drm_modeset_unlock(&dev->mode_config.connection_mutex); + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); + /* Send a Hotplug Uevent to userspace to start modeset */ + drm_kms_helper_hotplug_event(intel_encoder->base.dev); + } return true; } @@ -4213,9 +4310,13 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) static enum drm_connector_status intel_dp_detect_dpcd(struct intel_dp *intel_dp) { + struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); uint8_t *dpcd = intel_dp->dpcd; uint8_t type; + if (lspcon->active) + lspcon_resume(lspcon); + if (!intel_dp_get_dpcd(intel_dp)) return connector_status_disconnected; @@ -4474,11 +4575,9 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = connector->dev; enum drm_connector_status status; - enum intel_display_power_domain power_domain; u8 sink_irq_vector = 0; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(to_i915(dev), power_domain); + intel_display_power_get(to_i915(dev), intel_dp->aux_power_domain); /* Can't disconnect eDP, but you can close the lid... */ if (is_edp(intel_dp)) @@ -4511,11 +4610,15 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) yesno(intel_dp_source_supports_hbr2(intel_dp)), yesno(drm_dp_tps3_supported(intel_dp->dpcd))); - /* Set the max lane count for sink */ - intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + if (intel_dp->reset_link_params) { + /* Set the max lane count for sink */ + intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + + /* Set the max link BW for sink */ + intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); - /* Set the max link BW for sink */ - intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + intel_dp->reset_link_params = false; + } intel_dp_print_rates(intel_dp); @@ -4575,7 +4678,7 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(to_i915(dev), power_domain); + intel_display_power_put(to_i915(dev), intel_dp->aux_power_domain); return status; } @@ -4603,7 +4706,6 @@ intel_dp_force(struct drm_connector *connector) struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - enum intel_display_power_domain power_domain; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -4612,12 +4714,11 @@ intel_dp_force(struct drm_connector *connector) if (connector->status != connector_status_connected) return; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); intel_dp_set_edid(intel_dp); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); if (intel_encoder->type != INTEL_OUTPUT_EDP) intel_encoder->type = INTEL_OUTPUT_DP; @@ -4852,7 +4953,6 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; lockdep_assert_held(&dev_priv->pps_mutex); @@ -4866,8 +4966,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) * indefinitely. */ DRM_DEBUG_KMS("VDD left on by BIOS, adjusting state tracking\n"); - power_domain = intel_display_port_aux_power_domain(&intel_dig_port->base); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); edp_panel_vdd_schedule_off(intel_dp); } @@ -4897,6 +4996,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) if (lspcon->active) lspcon_resume(lspcon); + intel_dp->reset_link_params = true; + pps_lock(intel_dp); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -4939,10 +5040,8 @@ enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) { struct intel_dp *intel_dp = &intel_dig_port->dp; - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; enum irqreturn ret = IRQ_NONE; if (intel_dig_port->base.type != INTEL_OUTPUT_EDP && @@ -4966,12 +5065,12 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) long_hpd ? "long" : "short"); if (long_hpd) { + intel_dp->reset_link_params = true; intel_dp->detect_done = false; return IRQ_NONE; } - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { @@ -4999,7 +5098,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) ret = IRQ_HANDLED; put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); return ret; } @@ -5790,6 +5889,41 @@ out_vdd_off: return false; } +/* Set up the hotplug pin and aux power domain. */ +static void +intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) +{ + struct intel_encoder *encoder = &intel_dig_port->base; + struct intel_dp *intel_dp = &intel_dig_port->dp; + + switch (intel_dig_port->port) { + case PORT_A: + encoder->hpd_pin = HPD_PORT_A; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; + break; + case PORT_B: + encoder->hpd_pin = HPD_PORT_B; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_B; + break; + case PORT_C: + encoder->hpd_pin = HPD_PORT_C; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_C; + break; + case PORT_D: + encoder->hpd_pin = HPD_PORT_D; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; + break; + case PORT_E: + encoder->hpd_pin = HPD_PORT_E; + + /* FIXME: Check VBT for actual wiring of PORT E */ + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; + break; + default: + MISSING_CASE(intel_dig_port->port); + } +} + bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector) @@ -5807,6 +5941,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dig_port->max_lanes, port_name(port))) return false; + intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; intel_dp->active_pipe = INVALID_PIPE; @@ -5863,6 +5998,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; + intel_dp_init_connector_port_info(intel_dig_port); + intel_dp_aux_init(intel_dp); INIT_DELAYED_WORK(&intel_dp->panel_vdd_work, @@ -5875,29 +6012,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, else intel_connector->get_hw_state = intel_connector_get_hw_state; - /* Set up the hotplug pin. */ - switch (port) { - case PORT_A: - intel_encoder->hpd_pin = HPD_PORT_A; - break; - case PORT_B: - intel_encoder->hpd_pin = HPD_PORT_B; - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; - break; - case PORT_C: - intel_encoder->hpd_pin = HPD_PORT_C; - break; - case PORT_D: - intel_encoder->hpd_pin = HPD_PORT_D; - break; - case PORT_E: - intel_encoder->hpd_pin = HPD_PORT_E; - break; - default: - BUG(); - } - /* init MST on ports that can support it */ if (HAS_DP_MST(dev_priv) && !is_edp(intel_dp) && (port == PORT_B || port == PORT_C || port == PORT_D)) @@ -5982,6 +6096,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_dig_port->max_lanes = 4; intel_encoder->type = INTEL_OUTPUT_DP; + intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) intel_encoder->crtc_mask = 1 << 2; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 38e3ca2f6f18..d94fd4b80963 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -47,6 +47,11 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = false; bpp = 24; + if (intel_dp->compliance.test_data.bpc) { + bpp = intel_dp->compliance.test_data.bpc * 3; + DRM_DEBUG_KMS("Setting pipe bpp to %d\n", + bpp); + } /* * for MST we always configure max link bw - the spec doesn't * seem to suggest we should do otherwise. @@ -55,7 +60,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->lane_count = lane_count; - pipe_config->pipe_bpp = 24; + pipe_config->pipe_bpp = bpp; pipe_config->port_clock = intel_dp_max_link_rate(intel_dp); state = pipe_config->base.state; @@ -87,7 +92,6 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = &intel_dig_port->dp; struct intel_connector *connector = to_intel_connector(old_conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int ret; DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); @@ -98,10 +102,8 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, if (ret) { DRM_ERROR("failed to update payload %d\n", ret); } - if (old_crtc_state->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - } } static void intel_mst_post_disable_dp(struct intel_encoder *encoder, @@ -214,10 +216,8 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, ret = drm_dp_check_act_status(&intel_dp->mst_mgr); ret = drm_dp_update_payload_part2(&intel_dp->mst_mgr); - if (pipe_config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(encoder, pipe_config, conn_state); - } } static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, @@ -548,6 +548,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum DRM_MODE_ENCODER_DPMST, "DP-MST %c", pipe_name(pipe)); intel_encoder->type = INTEL_OUTPUT_DP_MST; + intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->port; intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index e59e43a9f3a6..b4de632f1158 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -42,44 +42,6 @@ * commit phase. */ -struct intel_shared_dpll * -skl_find_link_pll(struct drm_i915_private *dev_priv, int clock) -{ - struct intel_shared_dpll *pll = NULL; - struct intel_dpll_hw_state dpll_hw_state; - enum intel_dpll_id i; - bool found = false; - - if (!skl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) - return pll; - - for (i = DPLL_ID_SKL_DPLL1; i <= DPLL_ID_SKL_DPLL3; i++) { - pll = &dev_priv->shared_dplls[i]; - - /* Only want to check enabled timings first */ - if (pll->state.crtc_mask == 0) - continue; - - if (memcmp(&dpll_hw_state, &pll->state.hw_state, - sizeof(pll->state.hw_state)) == 0) { - found = true; - break; - } - } - - /* Ok no matching timings, maybe there's a free one? */ - for (i = DPLL_ID_SKL_DPLL1; - ((found == false) && (i <= DPLL_ID_SKL_DPLL3)); i++) { - pll = &dev_priv->shared_dplls[i]; - if (pll->state.crtc_mask == 0) { - pll->state.hw_state = dpll_hw_state; - break; - } - } - - return pll; -} - static void intel_atomic_duplicate_dpll_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll_state *shared_dpll) @@ -811,8 +773,8 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(int clock, return pll; } -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock) +static struct intel_shared_dpll * +hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, int clock) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_shared_dpll *pll; @@ -1360,8 +1322,9 @@ static bool skl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, } -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +skl_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { uint32_t ctrl1; @@ -1816,8 +1779,9 @@ static bool bxt_ddi_set_dpll_hw_state(int clock, return true; } -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +bxt_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { struct bxt_clk_div clk_div = {0}; @@ -2016,7 +1980,7 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; else if (IS_GEN9_LP(dev_priv)) dpll_mgr = &bxt_pll_mgr; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index af1497eb4f9c..f8d13a947c13 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -282,20 +282,4 @@ void intel_shared_dpll_init(struct drm_device *dev); void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state); -/* BXT dpll related functions */ -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); - - -/* SKL dpll related functions */ -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); -struct intel_shared_dpll *skl_find_link_pll(struct drm_i915_private *dev_priv, - int clock); - - -/* HSW dpll related functions */ -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock); - #endif /* _INTEL_DPLL_MGR_H_ */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 40fed65a791d..741beba0c9c0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -241,6 +241,9 @@ struct intel_encoder { * be set correctly before calling this function. */ void (*get_config)(struct intel_encoder *, struct intel_crtc_state *pipe_config); + /* Returns a mask of power domains that need to be referenced as part + * of the hardware state readout code. */ + u64 (*get_power_domains)(struct intel_encoder *encoder); /* * Called during system suspend after all pending requests for the * encoder are flushed (for example for DP AUX transactions) and @@ -249,6 +252,7 @@ struct intel_encoder { void (*suspend)(struct intel_encoder *); int crtc_mask; enum hpd_pin hpd_pin; + enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ const struct drm_connector *audio_connector; }; @@ -333,13 +337,20 @@ struct dpll { struct intel_atomic_state { struct drm_atomic_state base; - unsigned int cdclk; - - /* - * Calculated device cdclk, can be different from cdclk - * only when all crtc's are DPMS off. - */ - unsigned int dev_cdclk; + struct { + /* + * Logical state of cdclk (used for all scaling, watermark, + * etc. calculations and checks). This is computed as if all + * enabled crtcs were active. + */ + struct intel_cdclk_state logical; + + /* + * Actual state of cdclk, can be different from the logical + * state only when all crtc's are DPMS off. + */ + struct intel_cdclk_state actual; + } cdclk; bool dpll_set, modeset; @@ -356,9 +367,6 @@ struct intel_atomic_state { unsigned int active_crtcs; unsigned int min_pixclk[I915_MAX_PIPES]; - /* SKL/KBL Only */ - unsigned int cdclk_pll_vco; - struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; /* @@ -544,6 +552,12 @@ struct intel_crtc_state { * and get clipped at the edges. */ int pipe_src_w, pipe_src_h; + /* + * Pipe pixel rate, adjusted for + * panel fitter/pipe scaler downscaling. + */ + unsigned int pixel_rate; + /* Whether to set up the PCH/FDI. Note that we never allow sharing * between pch encoders and cpu encoders. */ bool has_pch_encoder; @@ -580,6 +594,14 @@ struct intel_crtc_state { */ bool dither; + /* + * Dither gets enabled for 18bpp which causes CRC mismatch errors for + * compliance video pattern tests. + * Disable dither only if it is a compliance test request for + * 18bpp. + */ + bool dither_force_disable; + /* Controls for the clock computation, to override various stages. */ bool clock_set; @@ -697,7 +719,7 @@ struct intel_crtc { bool active; bool lowfreq_avail; u8 plane_ids_mask; - unsigned long enabled_power_domains; + unsigned long long enabled_power_domains; struct intel_overlay *overlay; struct intel_flip_work *flip_work; @@ -890,12 +912,17 @@ struct intel_dp_desc { struct intel_dp_compliance_data { unsigned long edid; + uint8_t video_pattern; + uint16_t hdisplay, vdisplay; + uint8_t bpc; }; struct intel_dp_compliance { unsigned long test_type; struct intel_dp_compliance_data test_data; bool test_active; + int test_link_rate; + u8 test_lane_count; }; struct intel_dp { @@ -910,6 +937,7 @@ struct intel_dp { bool has_audio; bool detect_done; bool channel_eq_status; + bool reset_link_params; enum hdmi_force_audio force_audio; bool limited_color_range; bool color_range_auto; @@ -927,6 +955,7 @@ struct intel_dp { /* sink or branch descriptor */ struct intel_dp_desc desc; struct drm_dp_aux aux; + enum intel_display_power_domain aux_power_domain; uint8_t train_set[4]; int panel_power_up_delay; int panel_power_down_delay; @@ -989,7 +1018,6 @@ struct intel_dp { struct intel_lspcon { bool active; enum drm_lspcon_mode mode; - bool desc_valid; }; struct intel_digital_port { @@ -1002,6 +1030,7 @@ struct intel_digital_port { enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool); bool release_cl2_override; uint8_t max_lanes; + enum intel_display_power_domain ddi_io_power_domain; }; struct intel_dp_mst_encoder { @@ -1096,7 +1125,19 @@ intel_attached_encoder(struct drm_connector *connector) static inline struct intel_digital_port * enc_to_dig_port(struct drm_encoder *encoder) { - return container_of(encoder, struct intel_digital_port, base.base); + struct intel_encoder *intel_encoder = to_intel_encoder(encoder); + + switch (intel_encoder->type) { + case INTEL_OUTPUT_UNKNOWN: + WARN_ON(!HAS_DDI(to_i915(encoder->dev))); + case INTEL_OUTPUT_DP: + case INTEL_OUTPUT_EDP: + case INTEL_OUTPUT_HDMI: + return container_of(encoder, struct intel_digital_port, + base.base); + default: + return NULL; + } } static inline struct intel_dp_mst_encoder * @@ -1210,9 +1251,9 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); -struct intel_shared_dpll *intel_ddi_get_link_dpll(struct intel_dp *intel_dp, - int clock); -unsigned int intel_fb_align_height(struct drm_device *dev, +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); + +unsigned int intel_fb_align_height(struct drm_i915_private *dev_priv, unsigned int height, uint32_t pixel_format, uint64_t fb_format_modifier); @@ -1228,12 +1269,24 @@ void intel_audio_codec_disable(struct intel_encoder *encoder); void i915_audio_component_init(struct drm_i915_private *dev_priv); void i915_audio_component_cleanup(struct drm_i915_private *dev_priv); +/* intel_cdclk.c */ +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); +void intel_update_max_cdclk(struct drm_i915_private *dev_priv); +void intel_update_cdclk(struct drm_i915_private *dev_priv); +void intel_update_rawclk(struct drm_i915_private *dev_priv); +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b); +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); + /* intel_display.c */ enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco); void intel_update_rawclk(struct drm_i915_private *dev_priv); +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv); int vlv_get_cck_clock(struct drm_i915_private *dev_priv, const char *name, u32 reg, int ref_freq); +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg); void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv); void lpt_disable_iclkip(struct drm_i915_private *dev_priv); extern const struct drm_plane_funcs intel_plane_funcs; @@ -1308,9 +1361,8 @@ struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); void intel_unpin_fb_vma(struct i915_vma *vma); struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe); void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe); void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe); @@ -1385,10 +1437,7 @@ int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); bool intel_crtc_active(struct intel_crtc *crtc); void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder); -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder); +enum intel_display_power_domain intel_port_to_power_domain(enum port port); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); @@ -1661,6 +1710,7 @@ int intel_power_domains_init(struct drm_i915_private *); void intel_power_domains_fini(struct drm_i915_private *); void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); void intel_power_domains_suspend(struct drm_i915_private *dev_priv); +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume); void bxt_display_core_uninit(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable(struct drm_i915_private *dev_priv); @@ -1788,7 +1838,6 @@ bool skl_wm_level_equals(const struct skl_wm_level *l1, bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore); -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); static inline int intel_enable_rc6(void) @@ -1862,9 +1911,9 @@ intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, return to_intel_plane_state(plane_state); } -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state); +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state); /* intel_atomic_plane.c */ struct intel_plane_state *intel_create_plane_state(struct drm_plane *plane); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 16732e7bc08e..d7ffb9ac3c8a 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -366,32 +366,19 @@ static void bxt_dsi_device_ready(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - /* Exit Low power state in 4 steps*/ + /* Enable MIPI PHY transparent latch */ for_each_dsi_port(port, intel_dsi->ports) { - - /* 1. Enable MIPI PHY transparent latch */ val = I915_READ(BXT_MIPI_PORT_CTRL(port)); I915_WRITE(BXT_MIPI_PORT_CTRL(port), val | LP_OUTPUT_HOLD); usleep_range(2000, 2500); + } - /* 2. Enter ULPS */ - val = I915_READ(MIPI_DEVICE_READY(port)); - val &= ~ULPS_STATE_MASK; - val |= (ULPS_STATE_ENTER | DEVICE_READY); - I915_WRITE(MIPI_DEVICE_READY(port), val); - /* at least 2us - relaxed for hrtimer subsystem optimization */ - usleep_range(10, 50); - - /* 3. Exit ULPS */ + /* Clear ULPS and set device ready */ + for_each_dsi_port(port, intel_dsi->ports) { val = I915_READ(MIPI_DEVICE_READY(port)); val &= ~ULPS_STATE_MASK; - val |= (ULPS_STATE_EXIT | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - usleep_range(1000, 1500); - - /* Clear ULPS and set device ready */ - val = I915_READ(MIPI_DEVICE_READY(port)); - val &= ~ULPS_STATE_MASK; + usleep_range(2000, 2500); val |= DEVICE_READY; I915_WRITE(MIPI_DEVICE_READY(port), val); } @@ -456,12 +443,21 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder) if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { u32 temp; - - temp = I915_READ(VLV_CHICKEN_3); - temp &= ~PIXEL_OVERLAP_CNT_MASK | + if (IS_GEN9_LP(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) { + temp = I915_READ(MIPI_CTRL(port)); + temp &= ~BXT_PIXEL_OVERLAP_CNT_MASK | + intel_dsi->pixel_overlap << + BXT_PIXEL_OVERLAP_CNT_SHIFT; + I915_WRITE(MIPI_CTRL(port), temp); + } + } else { + temp = I915_READ(VLV_CHICKEN_3); + temp &= ~PIXEL_OVERLAP_CNT_MASK | intel_dsi->pixel_overlap << PIXEL_OVERLAP_CNT_SHIFT; - I915_WRITE(VLV_CHICKEN_3, temp); + I915_WRITE(VLV_CHICKEN_3, temp); + } } for_each_dsi_port(port, intel_dsi->ports) { @@ -548,6 +544,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; + u32 val; DRM_DEBUG_KMS("\n"); @@ -558,6 +555,17 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_disable_dsi_pll(encoder); intel_enable_dsi_pll(encoder, pipe_config); + if (IS_BROXTON(dev_priv)) { + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val | MIPIO_RST_CTRL); + + /* Power up DSI regulator */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, 0); + } + intel_dsi_prepare(encoder, pipe_config); /* Panel Enable over CRC PMIC */ @@ -604,6 +612,8 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state) { + struct drm_device *dev = encoder->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; @@ -611,6 +621,15 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, intel_panel_disable_backlight(intel_dsi->attached_connector); + /* + * Disable Device ready before the port shutdown in order + * to avoid split screen + */ + if (IS_BROXTON(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) + I915_WRITE(MIPI_DEVICE_READY(port), 0); + } + if (is_vid_mode(intel_dsi)) { /* Send Shutdown command to the panel in LP mode */ for_each_dsi_port(port, intel_dsi->ports) @@ -707,6 +726,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + u32 val; DRM_DEBUG_KMS("\n"); @@ -714,6 +734,17 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, intel_dsi_clear_device_ready(encoder); + if (IS_BROXTON(dev_priv)) { + /* Power down DSI regulator to save power */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, HS_IO_CTRL_SELECT); + + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val & ~MIPIO_RST_CTRL); + } + intel_disable_dsi_pll(encoder); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -744,14 +775,13 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum intel_display_power_domain power_domain; enum port port; bool active = false; DRM_DEBUG_KMS("\n"); - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; /* @@ -807,7 +837,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, } out_put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return active; } @@ -1485,6 +1515,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_connector->get_hw_state = intel_connector_get_hw_state; intel_encoder->port = port; + /* * On BYT/CHV, pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI * port C. BXT isn't limited like this. @@ -1560,7 +1591,8 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) * In case of BYT with CRC PMIC, we need to use GPIO for * Panel control. */ - if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) { + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC)) { intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", GPIOD_OUT_HIGH); @@ -1571,6 +1603,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) } intel_encoder->type = INTEL_OUTPUT_DSI; + intel_encoder->power_domain = POWER_DOMAIN_PORT_DSI; intel_encoder->cloneable = 0; drm_connector_init(dev, connector, &intel_dsi_connector_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 8f683b8b1816..84b3683c1f46 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -801,6 +801,19 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) 8); intel_dsi->clk_hs_to_lp_count += extra_byte_count; + DRM_DEBUG_KMS("Pclk %d\n", intel_dsi->pclk); + DRM_DEBUG_KMS("Pixel overlap %d\n", intel_dsi->pixel_overlap); + DRM_DEBUG_KMS("Lane count %d\n", intel_dsi->lane_count); + DRM_DEBUG_KMS("DPHY param reg 0x%x\n", intel_dsi->dphy_reg); + DRM_DEBUG_KMS("Video mode format %s\n", + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE ? + "non-burst with sync pulse" : + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS ? + "non-burst with sync events" : + intel_dsi->video_mode_format == VIDEO_MODE_BURST ? + "burst" : "<unknown>"); + DRM_DEBUG_KMS("Burst mode ratio %d\n", intel_dsi->burst_mode_ratio); + DRM_DEBUG_KMS("Reset timer %d\n", intel_dsi->rst_timer_val); DRM_DEBUG_KMS("Eot %s\n", enableddisabled(intel_dsi->eotp_pkt)); DRM_DEBUG_KMS("Clockstop %s\n", enableddisabled(!intel_dsi->clock_stop)); DRM_DEBUG_KMS("Mode %s\n", intel_dsi->operation_mode ? "command" : "video"); diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 61440e5c2563..3a7308681360 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -416,11 +416,7 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, rx_div_lower = rx_div & RX_DIVIDER_BIT_1_2; rx_div_upper = (rx_div & RX_DIVIDER_BIT_3_4) >> 2; - /* As per bpsec program the 8/3X clock divider to the below value */ - if (dev_priv->vbt.dsi.config->is_cmd_mode) - mipi_8by3_divider = 0x2; - else - mipi_8by3_divider = 0x3; + mipi_8by3_divider = 0x2; tmp |= BXT_MIPI_8X_BY3_DIVIDER(port, mipi_8by3_divider); tmp |= BXT_MIPI_TX_ESCLK_DIVIDER(port, tx_div); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 50da89dcb92b..6025839ed3b7 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -515,6 +515,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) "DVO %c", port_name(port)); intel_encoder->type = INTEL_OUTPUT_DVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 371acf109e34..c4d4698f98e7 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -110,21 +110,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv, } /** - * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * intel_engines_init_early() - allocate the Engine Command Streamers * @dev_priv: i915 device private * * Return: non-zero if the initialization failed. */ -int intel_engines_init(struct drm_i915_private *dev_priv) +int intel_engines_init_early(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; unsigned int mask = 0; - int (*init)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int i; - int ret; + int err; WARN_ON(ring_mask == 0); WARN_ON(ring_mask & @@ -134,20 +133,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv) if (!HAS_ENGINE(dev_priv, i)) continue; - if (i915.enable_execlists) - init = intel_engines[i].init_execlists; - else - init = intel_engines[i].init_legacy; - - if (!init) - continue; - - ret = intel_engine_setup(dev_priv, i); - if (ret) - goto cleanup; - - ret = init(dev_priv->engine[i]); - if (ret) + err = intel_engine_setup(dev_priv, i); + if (err) goto cleanup; mask |= ENGINE_MASK(i); @@ -166,14 +153,67 @@ int intel_engines_init(struct drm_i915_private *dev_priv) return 0; cleanup: + for_each_engine(engine, dev_priv, id) + kfree(engine); + return err; +} + +/** + * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * @dev_priv: i915 device private + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *device_info = mkwrite_device_info(dev_priv); + struct intel_engine_cs *engine; + enum intel_engine_id id, err_id; + unsigned int mask = 0; + int err = 0; + for_each_engine(engine, dev_priv, id) { + int (*init)(struct intel_engine_cs *engine); + if (i915.enable_execlists) - intel_logical_ring_cleanup(engine); + init = intel_engines[id].init_execlists; else - intel_engine_cleanup(engine); + init = intel_engines[id].init_legacy; + if (!init) { + kfree(engine); + dev_priv->engine[id] = NULL; + continue; + } + + err = init(engine); + if (err) { + err_id = id; + goto cleanup; + } + + mask |= ENGINE_MASK(id); } - return ret; + /* + * Catch failures to update intel_engines table when the new engines + * are added to the driver by a warning and disabling the forgotten + * engines. + */ + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) + device_info->ring_mask = mask; + + device_info->num_rings = hweight32(mask); + + return 0; + +cleanup: + for_each_engine(engine, dev_priv, id) { + if (id >= err_id) + kfree(engine); + else + dev_priv->gt.cleanup_engine(engine); + } + return err; } void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) @@ -212,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) engine->irq_seqno_barrier(engine); GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - engine->timeline->last_submitted_seqno = seqno; - engine->hangcheck.seqno = seqno; /* After manually advancing the seqno, fake the interrupt in case @@ -482,3 +520,557 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, break; } } + +static int wa_add(struct drm_i915_private *dev_priv, + i915_reg_t addr, + const u32 mask, const u32 val) +{ + const u32 idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; +} + +#define WA_REG(addr, mask, val) do { \ + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ + if (r) \ + return r; \ + } while (0) + +#define WA_SET_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + WA_REG(addr, mask, _MASKED_FIELD(mask, value)) + +#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) +#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) + +#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) + +static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, + i915_reg_t reg) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_workarounds *wa = &dev_priv->workarounds; + const uint32_t index = wa->hw_whitelist_count[engine->id]; + + if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) + return -EINVAL; + + WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), + i915_mmio_reg_offset(reg)); + wa->hw_whitelist_count[engine->id]++; + + return 0; +} + +static int gen8_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw,chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + + /* WaDisablePartialInstShootdown:bdw,chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for BDW and CHV; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); + + return 0; +} + +static int bdw_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); + + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + + return 0; +} + +static int chv_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); + + return 0; +} + +static int gen9_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); + + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ + I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaDisableKillLogic:bxt,skl,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + ECOCHK_DIS_TLB); + + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_DG_MIRROR_FIX_ENABLE); + + /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); + /* + * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set + * but we do that in per ctx batchbuffer as there is an issue + * with this register not getting restored on ctx restore + */ + } + + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_GPGPU_PREEMPTION); + + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); + + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + /* WaDisableMaskBasedCammingInRCC:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, + PIXEL_MASK_CAMMING_DISABLE); + + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableHDCInvalidation:skl,bxt,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); + + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + + /* WaOCLCoherentLineFlush:skl,bxt,kbl */ + I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES)); + + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ + ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); + if (ret) + return ret; + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ + ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + if (ret) + return ret; + + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ + ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); + if (ret) + return ret; + + return 0; +} + +static int skl_tune_iz_hashing(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return 0; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); + + return 0; +} + +static int skl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* + * Actual WA is to disable percontext preemption granularity control + * until D0 which is the default case so this is equivalent to + * !WaDisablePerCtxtPreemptionGranularityControl:skl + */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + + /* WaEnableGapsTsvCreditFix:skl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableGafsUnitClkGating:skl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:skl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return skl_tune_iz_hashing(engine); +} + +static int bxt_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaStoreMultiplePTEenable:bxt */ + /* This is a requirement according to Hardware specification */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); + + /* WaSetClckGatingDisableMedia:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & + ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); + } + + /* WaDisableThreadStallDopClockGating:bxt */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); + + /* WaDisablePooledEuLoadBalancingFix:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { + WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, + GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); + } + + /* WaDisableSbeCacheDispatchPortSharing:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + } + + /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ + /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ + /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ + /* WaDisableLSQCROPERFforOCL:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); + if (ret) + return ret; + + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + } + + /* WaProgramL3SqcReg1DefaultForPerf:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) + I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); + + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaInPlaceDecompressionHang:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + return 0; +} + +static int kbl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaEnableGapsTsvCreditFix:kbl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + WA_SET_BIT(GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableGafsUnitClkGating:kbl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + /* WaInPlaceDecompressionHang:kbl */ + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:kbl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return 0; +} + +static int glk_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +int init_workarounds_ring(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + WARN_ON(engine->id != RCS); + + dev_priv->workarounds.count = 0; + dev_priv->workarounds.hw_whitelist_count[RCS] = 0; + + if (IS_BROADWELL(dev_priv)) + return bdw_init_workarounds(engine); + + if (IS_CHERRYVIEW(dev_priv)) + return chv_init_workarounds(engine); + + if (IS_SKYLAKE(dev_priv)) + return skl_init_workarounds(engine); + + if (IS_BROXTON(dev_priv)) + return bxt_init_workarounds(engine); + + if (IS_KABYLAKE(dev_priv)) + return kbl_init_workarounds(engine); + + if (IS_GEMINILAKE(dev_priv)) + return glk_init_workarounds(engine); + + return 0; +} + +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +{ + struct i915_workarounds *w = &req->i915->workarounds; + u32 *cs; + int ret, i; + + if (w->count == 0) + return 0; + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(req, (w->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(w->count); + for (i = 0; i < w->count; i++) { + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; + } + *cs++ = MI_NOOP; + + intel_ring_advance(req, cs); + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); + + return 0; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_engine.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 89fe5c8464df..17d418b23d77 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -537,8 +537,7 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * reserved range size, so it always assumes the maximum (8mb) is used. * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ - if (IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv)) end = ggtt->stolen_size - 8 * 1024 * 1024; else end = U64_MAX; @@ -628,7 +627,8 @@ err_fb: kfree(compressed_llb); i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); err_llb: - pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + if (drm_mm_initialized(&dev_priv->mm.stolen)) + pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); return -ENOSPC; } @@ -743,8 +743,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cache->crtc.hsw_bdw_pixel_rate = - ilk_pipe_pixel_rate(crtc_state); + cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate; cache->plane.rotation = plane_state->base.rotation; cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; @@ -819,7 +818,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) /* WaFbcExceedCdClockThreshold:hsw,bdw */ if ((IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) && - cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk_freq * 95 / 100) { + cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk.hw.cdclk * 95 / 100) { fbc->no_fbc_reason = "pixel rate is too big"; return false; } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 1b8ba2e77539..82993a87654c 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -45,6 +45,14 @@ #include <drm/i915_drm.h> #include "i915_drv.h" +static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev) +{ + struct drm_i915_gem_object *obj = ifbdev->fb->obj; + unsigned int origin = ifbdev->vma->fence ? ORIGIN_GTT : ORIGIN_CPU; + + intel_fb_obj_invalidate(obj, origin); +} + static int intel_fbdev_set_par(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; @@ -53,12 +61,8 @@ static int intel_fbdev_set_par(struct fb_info *info) int ret; ret = drm_fb_helper_set_par(info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -71,12 +75,8 @@ static int intel_fbdev_blank(int blank, struct fb_info *info) int ret; ret = drm_fb_helper_blank(blank, info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -87,15 +87,11 @@ static int intel_fbdev_pan_display(struct fb_var_screeninfo *var, struct drm_fb_helper *fb_helper = info->par; struct intel_fbdev *ifbdev = container_of(fb_helper, struct intel_fbdev, helper); - int ret; - ret = drm_fb_helper_pan_display(var, info); - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + ret = drm_fb_helper_pan_display(var, info); + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -121,7 +117,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_mode_fb_cmd2 mode_cmd = {}; - struct drm_i915_gem_object *obj = NULL; + struct drm_i915_gem_object *obj; int size, ret; /* we don't do packed 24bpp */ @@ -136,14 +132,13 @@ static int intelfb_alloc(struct drm_fb_helper *helper, mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); - mutex_lock(&dev->struct_mutex); - size = mode_cmd.pitches[0] * mode_cmd.height; size = PAGE_ALIGN(size); /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ + obj = NULL; if (size * 2 < ggtt->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (obj == NULL) @@ -151,24 +146,22 @@ static int intelfb_alloc(struct drm_fb_helper *helper, if (IS_ERR(obj)) { DRM_ERROR("failed to allocate framebuffer\n"); ret = PTR_ERR(obj); - goto out; + goto err; } - fb = __intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) { - i915_gem_object_put(obj); ret = PTR_ERR(fb); - goto out; + goto err_obj; } - mutex_unlock(&dev->struct_mutex); - ifbdev->fb = to_intel_framebuffer(fb); return 0; -out: - mutex_unlock(&dev->struct_mutex); +err_obj: + i915_gem_object_put(obj); +err: return ret; } @@ -357,23 +350,23 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned long conn_configured, mask; + unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); int i, j; bool *save_enabled; bool fallback = true; int num_connectors_enabled = 0; int num_connectors_detected = 0; - int pass = 0; save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL); if (!save_enabled) return false; memcpy(save_enabled, enabled, count); - mask = BIT(count) - 1; + mask = GENMASK(count - 1, 0); conn_configured = 0; retry: + conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -387,7 +380,7 @@ retry: if (conn_configured & BIT(i)) continue; - if (pass == 0 && !connector->has_tile) + if (conn_seq == 0 && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -498,10 +491,8 @@ retry: conn_configured |= BIT(i); } - if ((conn_configured & mask) != mask) { - pass++; + if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; - } /* * If the BIOS didn't enable everything it could, fall back to have the @@ -631,7 +622,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, } cur_size = intel_crtc->config->base.adjusted_mode.crtc_vdisplay; - cur_size = intel_fb_align_height(dev, cur_size, + cur_size = intel_fb_align_height(to_i915(dev), cur_size, fb->base.format->format, fb->base.modifier); cur_size *= fb->base.pitches[0]; @@ -841,11 +832,6 @@ void intel_fbdev_restore_mode(struct drm_device *dev) if (!ifbdev->fb) return; - if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) { - DRM_DEBUG("failed to restore crtc mode\n"); - } else { - mutex_lock(&dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&dev->struct_mutex); - } + if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper) == 0) + intel_fbdev_invalidate(ifbdev); } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 966de4c7c7a2..fcfc217e754e 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -114,13 +114,12 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, } void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - if (retire) { + if (origin == ORIGIN_CS) { spin_lock(&dev_priv->fb_tracking.lock); /* Filter out new bits since rendering started. */ frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h index 7bab41218cf7..63cd9a753a72 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -38,7 +38,6 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, enum fb_op_origin origin, unsigned int frontbuffer_bits); void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits); @@ -69,15 +68,12 @@ static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, /** * intel_fb_obj_flush - flush frontbuffer object * @obj: GEM object to flush - * @retire: set when retiring asynchronous rendering * @origin: which operation caused the flush * * This function gets called every time rendering on the given object has - * completed and frontbuffer caching can be started again. If @retire is true - * then any delayed flushes will be unblocked. + * completed and frontbuffer caching can be started again. */ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin) { unsigned int frontbuffer_bits; @@ -86,7 +82,7 @@ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, if (!frontbuffer_bits) return; - __intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits); + __intel_fb_obj_flush(obj, origin, frontbuffer_bits); } #endif /* __INTEL_FRONTBUFFER_H__ */ diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 2f1cf9aea04e..9885f760f2ef 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -520,10 +520,6 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) guc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; - DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_uc_fw_status_repr(guc_fw->fetch_status), - intel_uc_fw_status_repr(guc_fw->load_status)); - intel_guc_auth_huc(dev_priv); if (i915.enable_guc_submission) { @@ -536,6 +532,11 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) guc_interrupts_capture(dev_priv); } + DRM_INFO("GuC %s (firmware %s [version %u.%u])\n", + i915.enable_guc_submission ? "submission enabled" : "loaded", + guc_fw->path, + guc_fw->major_ver_found, guc_fw->minor_ver_found); + return 0; fail: @@ -713,12 +714,9 @@ fail: DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", err, fw, uc_fw->obj); - mutex_lock(&dev_priv->drm.struct_mutex); - obj = uc_fw->obj; + obj = fetch_and_zero(&uc_fw->obj); if (obj) i915_gem_object_put(obj); - uc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); release_firmware(fw); /* OK even if fw is NULL */ uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; @@ -792,16 +790,17 @@ void intel_guc_init(struct drm_i915_private *dev_priv) void intel_guc_fini(struct drm_i915_private *dev_priv) { struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct drm_i915_gem_object *obj; mutex_lock(&dev_priv->drm.struct_mutex); guc_interrupts_release(dev_priv); i915_guc_submission_disable(dev_priv); i915_guc_submission_fini(dev_priv); - - if (guc_fw->obj) - i915_gem_object_put(guc_fw->obj); - guc_fw->obj = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); + obj = fetch_and_zero(&guc_fw->obj); + if (obj) + i915_gem_object_put(obj); + guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index f05971f5586f..dce742243ba6 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -480,3 +480,7 @@ void intel_hangcheck_init(struct drm_i915_private *i915) INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, i915_hangcheck_elapsed); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_hangcheck.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index af16b0fa6b69..048f76d329bb 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -901,12 +901,11 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -926,7 +925,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1868,14 +1867,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, switch (port) { case PORT_B: - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; - else - intel_encoder->hpd_pin = HPD_PORT_B; + intel_encoder->hpd_pin = HPD_PORT_B; break; case PORT_C: intel_encoder->hpd_pin = HPD_PORT_C; @@ -1987,6 +1979,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, } intel_encoder->type = INTEL_OUTPUT_HDMI; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index b62e3f8ad415..0756bdc672b3 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -100,7 +100,6 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) } #define HPD_STORM_DETECT_PERIOD 1000 -#define HPD_STORM_THRESHOLD 5 #define HPD_STORM_REENABLE_DELAY (2 * 60 * 1000) /** @@ -112,9 +111,13 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) * storms. Only the pin specific stats and state are changed, the caller is * responsible for further action. * - * @HPD_STORM_THRESHOLD irqs are allowed within @HPD_STORM_DETECT_PERIOD ms, - * otherwise it's considered an irq storm, and the irq state is set to - * @HPD_MARK_DISABLED. + * The number of irqs that are allowed within @HPD_STORM_DETECT_PERIOD is + * stored in @dev_priv->hotplug.hpd_storm_threshold which defaults to + * @HPD_STORM_DEFAULT_THRESHOLD. If this threshold is exceeded, it's + * considered an irq storm and the irq state is set to @HPD_MARK_DISABLED. + * + * The HPD threshold can be controlled through i915_hpd_storm_ctl in debugfs, + * and should only be adjusted for automated hotplug testing. * * Return true if an irq storm was detected on @pin. */ @@ -123,13 +126,15 @@ static bool intel_hpd_irq_storm_detect(struct drm_i915_private *dev_priv, { unsigned long start = dev_priv->hotplug.stats[pin].last_jiffies; unsigned long end = start + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD); + const int threshold = dev_priv->hotplug.hpd_storm_threshold; bool storm = false; if (!time_in_range(jiffies, start, end)) { dev_priv->hotplug.stats[pin].last_jiffies = jiffies; dev_priv->hotplug.stats[pin].count = 0; DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: 0\n", pin); - } else if (dev_priv->hotplug.stats[pin].count > HPD_STORM_THRESHOLD) { + } else if (dev_priv->hotplug.stats[pin].count > threshold && + threshold) { dev_priv->hotplug.stats[pin].state = HPD_MARK_DISABLED; DRM_DEBUG_KMS("HPD interrupt storm detected on PIN %d\n", pin); storm = true; @@ -219,7 +224,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) } } } - if (dev_priv->display.hpd_irq_setup) + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); @@ -425,7 +430,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, } } - if (storm_detected) + if (storm_detected && dev_priv->display_irqs_enabled) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock(&dev_priv->irq_lock); @@ -471,10 +476,12 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) * Interrupt setup is already guaranteed to be single-threaded, this is * just to make the assert_spin_locked checks happy. */ - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) { + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); + } } static void i915_hpd_poll_init_work(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index c144609425f6..c28543d220a2 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -274,12 +274,11 @@ fail: void intel_huc_fini(struct drm_i915_private *dev_priv) { struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct drm_i915_gem_object *obj; - mutex_lock(&dev_priv->drm.struct_mutex); - if (huc_fw->obj) - i915_gem_object_put(huc_fw->obj); - huc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); + obj = fetch_and_zero(&huc_fw->obj); + if (obj) + i915_gem_object_put(obj); huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index bce1ba80f277..b6401e8f1bd6 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -74,7 +74,7 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, { if (IS_GEN9_LP(dev_priv)) return &gmbus_pins_bxt[pin]; - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) return &gmbus_pins_skl[pin]; else if (IS_BROADWELL(dev_priv)) return &gmbus_pins_bdw[pin]; @@ -89,7 +89,7 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, if (IS_GEN9_LP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) size = ARRAY_SIZE(gmbus_pins_skl); else if (IS_BROADWELL(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bdw); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ebf8023d21e6..1c6c71673bfa 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -190,13 +190,7 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 -#define GEN8_CTX_VALID (1<<0) -#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) -#define GEN8_CTX_FORCE_RESTORE (1<<2) -#define GEN8_CTX_L3LLC_COHERENT (1<<5) -#define GEN8_CTX_PRIVILEGE (1<<8) - -#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \ +#define CTX_REG(reg_state, pos, reg, val) do { \ (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \ (reg_state)[(pos)+1] = (val); \ } while (0) @@ -212,14 +206,6 @@ reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ } while (0) -enum { - FAULT_AND_HANG = 0, - FAULT_AND_HALT, /* Debug only */ - FAULT_AND_STREAM, - FAULT_AND_CONTINUE /* Unsupported */ -}; -#define GEN8_CTX_ID_SHIFT 32 -#define GEN8_CTX_ID_WIDTH 21 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 @@ -267,30 +253,6 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl return 0; } -static void -logical_ring_init_platform_invariants(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - engine->disable_lite_restore_wa = - IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && - (engine->id == VCS || engine->id == VCS2); - - engine->ctx_desc_template = GEN8_CTX_VALID; - if (IS_GEN8(dev_priv)) - engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; - engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE; - - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers */ - /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */ - - /* WaEnableForceRestoreInCtxtDescForVCS:skl */ - /* WaEnableForceRestoreInCtxtDescForVCS:bxt */ - if (engine->disable_lite_restore_wa) - engine->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; -} - /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context @@ -304,7 +266,7 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) * * This is what a descriptor looks like, from LSB to MSB:: * - * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) * bits 12-31: LRCA, GTT address of (the HWSP of) this context * bits 32-52: ctx ID, a globally unique tag * bits 53-54: mbz, reserved for use by hardware @@ -319,8 +281,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH)); - desc = ctx->desc_template; /* bits 3-4 */ - desc |= engine->ctx_desc_template; /* bits 0-11 */ + desc = ctx->desc_template; /* bits 0-11 */ desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -364,6 +325,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; + GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8)); reg_state[CTX_RING_TAIL+1] = rq->tail; /* True 32b PPGTT with dynamic page allocation: update PDP @@ -371,7 +333,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. */ - if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + if (ppgtt && !i915_vm_is_48bit(&ppgtt->base)) execlists_update_context_pdps(ppgtt, reg_state); return ce->lrc_desc; @@ -385,17 +347,20 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); u64 desc[2]; + GEM_BUG_ON(port[0].count > 1); if (!port[0].count) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); desc[0] = execlists_update_context(port[0].request); - engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */ + GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); + port[0].count++; if (port[1].request) { GEM_BUG_ON(port[1].count); execlists_context_status_change(port[1].request, INTEL_CONTEXT_SCHEDULE_IN); desc[1] = execlists_update_context(port[1].request); + GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1])); port[1].count = 1; } else { desc[1] = 0; @@ -514,6 +479,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) cursor->priotree.priority = INT_MAX; __i915_gem_request_submit(cursor); + trace_i915_gem_request_in(cursor, port - engine->execlist_port); last = cursor; submit = true; } @@ -547,22 +513,24 @@ bool intel_execlists_idle(struct drm_i915_private *dev_priv) if (!i915.enable_execlists) return true; - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + /* Interrupt/tasklet pending? */ + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) + return false; + + /* Both ports drained, no more ELSP submission? */ if (!execlists_elsp_idle(engine)) return false; + } return true; } -static bool execlists_elsp_ready(struct intel_engine_cs *engine) +static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { - int port; + const struct execlist_port *port = engine->execlist_port; - port = 1; /* wait for a free slot */ - if (engine->disable_lite_restore_wa || engine->preempt_wa) - port = 0; /* wait for GPU to be idle before continuing */ - - return !engine->execlist_port[port].request; + return port[0].count + port[1].count < 2; } /* @@ -577,7 +545,7 @@ static void intel_lrc_irq_handler(unsigned long data) intel_uncore_forcewake_get(dev_priv, engine->fw_domains); - if (!execlists_elsp_idle(engine)) { + while (test_and_clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { u32 __iomem *csb_mmio = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); u32 __iomem *buf = @@ -587,31 +555,55 @@ static void intel_lrc_irq_handler(unsigned long data) csb = readl(csb_mmio); head = GEN8_CSB_READ_PTR(csb); tail = GEN8_CSB_WRITE_PTR(csb); + if (head == tail) + break; + if (tail < head) tail += GEN8_CSB_ENTRIES; - while (head < tail) { + do { unsigned int idx = ++head % GEN8_CSB_ENTRIES; unsigned int status = readl(buf + 2 * idx); + /* We are flying near dragons again. + * + * We hold a reference to the request in execlist_port[] + * but no more than that. We are operating in softirq + * context and so cannot hold any mutex or sleep. That + * prevents us stopping the requests we are processing + * in port[] from being retired simultaneously (the + * breadcrumb will be complete before we see the + * context-switch). As we only hold the reference to the + * request, any pointer chasing underneath the request + * is subject to a potential use-after-free. Thus we + * store all of the bookkeeping within port[] as + * required, and avoid using unguarded pointers beneath + * request itself. The same applies to the atomic + * status notifier. + */ + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + /* Check the context/desc id for this event matches */ + GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) != + port[0].context_id); + GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_OUT); + trace_i915_gem_request_out(port[0].request); i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); - - engine->preempt_wa = false; } GEM_BUG_ON(port[0].count == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - } + } while (head < tail); writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, GEN8_CSB_WRITE_PTR(csb) << 8), @@ -658,10 +650,11 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - if (insert_request(&request->priotree, &engine->execlist_queue)) + if (insert_request(&request->priotree, &engine->execlist_queue)) { engine->execlist_first = &request->priotree.node; - if (execlists_elsp_idle(engine)) - tasklet_hi_schedule(&engine->irq_tasklet); + if (execlists_elsp_ready(engine)) + tasklet_hi_schedule(&engine->irq_tasklet); + } spin_unlock_irqrestore(&engine->timeline->lock, flags); } @@ -784,11 +777,9 @@ static int execlists_context_pin(struct intel_engine_cs *engine, } GEM_BUG_ON(!ce->state); - flags = PIN_GLOBAL; + flags = PIN_GLOBAL | PIN_HIGH; if (ctx->ggtt_offset_bias) flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; - if (i915_gem_context_is_kernel(ctx)) - flags |= PIN_HIGH; ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags); if (ret) @@ -847,6 +838,7 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; + u32 *cs; int ret; GEM_BUG_ON(!ce->pin_count); @@ -871,9 +863,11 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) goto err; } - ret = intel_ring_begin(request, 0); - if (ret) + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) { + ret = PTR_ERR(cs); goto err_unreserve; + } if (!ce->initialised) { ret = engine->init_context(request); @@ -900,51 +894,6 @@ err: return ret; } -static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - int ret, i; - struct intel_ring *ring = req->ring; - struct i915_workarounds *w = &req->i915->workarounds; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - ret = intel_ring_begin(req, w->count * 2 + 2); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); - for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); - } - intel_ring_emit(ring, MI_NOOP); - - intel_ring_advance(ring); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - return 0; -} - -#define wa_ctx_emit(batch, index, cmd) \ - do { \ - int __index = (index)++; \ - if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ - return -ENOSPC; \ - } \ - batch[__index] = (cmd); \ - } while (0) - -#define wa_ctx_emit_reg(batch, index, reg) \ - wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg)) - /* * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after * PIPE_CONTROL instruction. This is required for the flush to happen correctly @@ -961,56 +910,29 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) * This WA is also required for Gen9 so extracting as a function avoids * code duplication. */ -static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, - uint32_t *batch, - uint32_t index) +static u32 * +gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) { - uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); - - wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); - - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, l3sqc4_flush); - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE)); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - - wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); - - return index; -} + *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; -static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t start_alignment) -{ - return wa_ctx->offset = ALIGN(offset, start_alignment); -} + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; -static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t size_alignment) -{ - wa_ctx->size = offset - wa_ctx->offset; + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); - WARN(wa_ctx->size % size_alignment, - "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n", - wa_ctx->size, size_alignment); - return 0; + *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; + + return batch; } /* @@ -1028,42 +950,28 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together * makes a complete batch buffer. */ -static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t scratch_addr; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ - if (IS_BROADWELL(engine->i915)) { - int rc = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (rc < 0) - return rc; - index = rc; - } + if (IS_BROADWELL(engine->i915)) + batch = gen8_emit_flush_coherentl3_wa(engine, batch); /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; /* * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because @@ -1071,7 +979,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * in the register CTX_RCS_INDIRECT_CTX */ - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } /* @@ -1083,65 +991,40 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. */ -static int gen8_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); - - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *batch++ = MI_BATCH_BUFFER_END; - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - int ret; - struct drm_i915_private *dev_priv = engine->i915; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); - - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */ - ret = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (ret < 0) - return ret; - index = ret; + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ + batch = gen8_emit_flush_coherentl3_wa(engine, batch); - /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */ - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( - GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE)); - wa_ctx_emit(batch, index, MI_NOOP); + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2); + *batch++ = _MASKED_BIT_DISABLE( + GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE); + *batch++ = MI_NOOP; /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { - u32 scratch_addr = - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); } - /* WaMediaPoolStateCmdInWABB:bxt */ + /* WaMediaPoolStateCmdInWABB:bxt,glk */ if (HAS_POOLED_EU(engine->i915)) { /* * EU pool configuration is setup along with golden context @@ -1156,73 +1039,37 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, * possible configurations, to avoid duplication they are * not shown here again. */ - u32 eu_pool_config = 0x00777000; - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_STATE); - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_ENABLE); - wa_ctx_emit(batch, index, eu_pool_config); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + *batch++ = GEN9_MEDIA_POOL_STATE; + *batch++ = GEN9_MEDIA_POOL_ENABLE; + *batch++ = 0x00777000; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; } /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } -static int gen9_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0); - wa_ctx_emit(batch, index, - _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING)); - wa_ctx_emit(batch, index, MI_NOOP); - } - - /* WaClearTdlStateAckDirtyBits:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_B0)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4)); - - wa_ctx_emit_reg(batch, index, GEN8_STATE_ACK); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE1); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN7_ROW_CHICKEN2); - /* dummy write to CS, mask bits are 0 to ensure the register is not modified */ - wa_ctx_emit(batch, index, 0x0); - wa_ctx_emit(batch, index, MI_NOOP); - } - - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); + *batch++ = MI_BATCH_BUFFER_END; - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); - - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) +#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) + +static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; struct i915_vma *vma; int err; - obj = i915_gem_object_create(engine->i915, PAGE_ALIGN(size)); + obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1244,82 +1091,79 @@ err: return err; } -static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) +static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma); } +typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); + static int intel_init_workaround_bb(struct intel_engine_cs *engine) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - uint32_t *batch; - uint32_t offset; + struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, + &wa_ctx->per_ctx }; + wa_bb_func_t wa_bb_fn[2]; struct page *page; + void *batch, *batch_ptr; + unsigned int i; int ret; - WARN_ON(engine->id != RCS); + if (WARN_ON(engine->id != RCS || !engine->scratch)) + return -EINVAL; - /* update this when WA for higher Gen are added */ - if (INTEL_GEN(engine->i915) > 9) { - DRM_ERROR("WA batch buffer is not initialized for Gen%d\n", - INTEL_GEN(engine->i915)); + switch (INTEL_GEN(engine->i915)) { + case 9: + wa_bb_fn[0] = gen9_init_indirectctx_bb; + wa_bb_fn[1] = gen9_init_perctx_bb; + break; + case 8: + wa_bb_fn[0] = gen8_init_indirectctx_bb; + wa_bb_fn[1] = gen8_init_perctx_bb; + break; + default: + MISSING_CASE(INTEL_GEN(engine->i915)); return 0; } - /* some WA perform writes to scratch page, ensure it is valid */ - if (!engine->scratch) { - DRM_ERROR("scratch page not allocated for %s\n", engine->name); - return -EINVAL; - } - - ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE); + ret = lrc_setup_wa_ctx(engine); if (ret) { DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); return ret; } page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); - batch = kmap_atomic(page); - offset = 0; - - if (IS_GEN8(engine->i915)) { - ret = gen8_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; + batch = batch_ptr = kmap_atomic(page); - ret = gen8_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; - } else if (IS_GEN9(engine->i915)) { - ret = gen9_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; - - ret = gen9_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; + /* + * Emit the two workaround batch buffers, recording the offset from the + * start of the workaround batch buffer object for each and their + * respective sizes. + */ + for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { + wa_bb[i]->offset = batch_ptr - batch; + if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) { + ret = -EINVAL; + break; + } + batch_ptr = wa_bb_fn[i](engine, batch_ptr); + wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } -out: + BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + kunmap_atomic(batch); if (ret) - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); return ret; } +static u32 port_seqno(struct execlist_port *port) +{ + return port->request ? port->request->global_seqno : 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1343,7 +1187,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); /* After a GPU reset, we may have requests to replay */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); if (!execlists_elsp_idle(engine)) { + DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", + engine->name, + port_seqno(&engine->execlist_port[0]), + port_seqno(&engine->execlist_port[1])); engine->execlist_port[0].count = 0; engine->execlist_port[1].count = 0; execlists_submit_ports(engine); @@ -1388,7 +1237,6 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; struct intel_context *ce; @@ -1429,7 +1277,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, return; /* Catch up with any missed context-switch interrupts */ - I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0)); if (request->ctx != port[0].request->ctx) { i915_gem_request_put(port[0].request); port[0] = port[1]; @@ -1440,32 +1287,33 @@ static void reset_common_ring(struct intel_engine_cs *engine, /* Reset WaIdleLiteRestore:bdw,skl as well */ request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; - int i, ret; + u32 *cs; + int i; - ret = intel_ring_begin(req, num_lri_cmds * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, num_lri_cmds * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); + *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds); for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); - intel_ring_emit(ring, upper_32_bits(pd_daddr)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); - intel_ring_emit(ring, lower_32_bits(pd_daddr)); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i)); + *cs++ = lower_32_bits(pd_daddr); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1474,8 +1322,8 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); + u32 *cs; int ret; /* Don't rely in hw updating PDPs, specially in lite-restore. @@ -1486,7 +1334,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * not needed in 48-bit.*/ if (req->ctx->ppgtt && (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) { - if (!USES_FULL_48BIT_PPGTT(req->i915) && + if (!i915_vm_is_48bit(&req->ctx->ppgtt->base) && !intel_vgpu_active(req->i915)) { ret = intel_logical_ring_emit_pdps(req); if (ret) @@ -1496,19 +1344,17 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | - (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & + I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1529,13 +1375,11 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; - u32 cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(request, 4); - if (ret) - return ret; + cs = intel_ring_begin(request, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW + 1; @@ -1552,13 +1396,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) cmd |= MI_INVALIDATE_BSD; } - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + intel_ring_advance(request, cs); return 0; } @@ -1566,13 +1408,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) static int gen8_emit_flush_render(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; u32 scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; int len; flags |= PIPE_CONTROL_CS_STALL; @@ -1614,62 +1454,25 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, if (dc_flush_wa) len += 12; - ret = intel_ring_begin(request, len); - if (ret) - return ret; + cs = intel_ring_begin(request, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); - if (vf_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - } + if (vf_flush_wa) + cs = gen8_emit_pipe_control(cs, 0, 0); - if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - } + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - - if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - } + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); - intel_ring_advance(ring); + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); - return 0; -} + intel_ring_advance(request, cs); -static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) -{ - /* - * On BXT A steppings there is a HW coherency issue whereby the - * MI_STORE_DATA_IMM storing the completed request's seqno - * occasionally doesn't invalidate the CPU cache. Work around this by - * clflushing the corresponding cacheline whenever the caller wants - * the coherency to be guaranteed. Note that this cacheline is known - * to be clean at this point, since we only write it in - * bxt_a_set_seqno(), where we also do a clflush after the write. So - * this clflush in practice becomes an invalidate operation. - */ - intel_flush_status_page(engine, I915_GEM_HWS_INDEX); + return 0; } /* @@ -1677,34 +1480,34 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out) +static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *out++ = MI_NOOP; - *out++ = MI_NOOP; - request->wa_tail = intel_ring_offset(request->ring, out); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + request->wa_tail = intel_ring_offset(request, cs); } -static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, - u32 *out) +static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; - *out++ = 0; - *out++ = request->global_seqno; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = request->global_seqno; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, - u32 *out) + u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -1713,20 +1516,20 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(request->engine); - *out++ = 0; - *out++ = request->global_seqno; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(request->engine); + *cs++ = 0; + *cs++ = request->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; @@ -1735,7 +1538,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { int ret; - ret = intel_logical_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(req); if (ret) return ret; @@ -1781,7 +1584,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); @@ -1819,8 +1622,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; engine->emit_bb_start = gen8_emit_bb_start; - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - engine->irq_seqno_barrier = bxt_a_seqno_barrier; } static inline void @@ -1877,7 +1678,6 @@ logical_ring_setup(struct intel_engine_cs *engine) tasklet_init(&engine->irq_tasklet, intel_lrc_irq_handler, (unsigned long)engine); - logical_ring_init_platform_invariants(engine); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); } @@ -2015,105 +1815,89 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static void execlists_init_reg_state(u32 *reg_state, +static void execlists_init_reg_state(u32 *regs, struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring) { struct drm_i915_private *dev_priv = engine->i915; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; + u32 base = engine->mmio_base; + bool rcs = engine->id == RCS; + + /* A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + */ + regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | + MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + (HAS_RESOURCE_STREAMER(dev_priv) ? + CTX_CTRL_RS_CTX_ENABLE : 0))); + CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); + CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), + RING_CTL_SIZE(ring->size) | RING_VALID); + CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); + CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); + CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); + CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); + CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); + CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); + if (rcs) { + CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, + RING_INDIRECT_CTX_OFFSET(base), 0); - /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM - * commands followed by (reg, value) pairs. The values we are setting here are - * only for the first context restore: on a subsequent save, the GPU will - * recreate this batchbuffer with new values (including all the missing - * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */ - reg_state[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(engine->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, - RING_CONTEXT_CONTROL(engine), - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - (HAS_RESOURCE_STREAMER(dev_priv) ? - CTX_CTRL_RS_CTX_ENABLE : 0))); - ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, - RING_START(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, - RING_CTL(engine->mmio_base), - RING_CTL_SIZE(ring->size) | RING_VALID); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, - RING_BBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, - RING_BBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_STATE, - RING_BBSTATE(engine->mmio_base), - RING_BB_PPGTT); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U, - RING_SBBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L, - RING_SBBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE, - RING_SBBSTATE(engine->mmio_base), 0); - if (engine->id == RCS) { - ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR, - RING_BB_PER_CTX_PTR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX, - RING_INDIRECT_CTX(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); if (engine->wa_ctx.vma) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - reg_state[CTX_RCS_INDIRECT_CTX+1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | - (wa_ctx->indirect_ctx.size / CACHELINE_DWORDS); + regs[CTX_RCS_INDIRECT_CTX + 1] = + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = + regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = intel_lr_indirect_ctx_offset(engine) << 6; - reg_state[CTX_BB_PER_CTX_PTR+1] = - (ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) | - 0x01; + regs[CTX_BB_PER_CTX_PTR + 1] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } } - reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP, - RING_CTX_TIMESTAMP(engine->mmio_base), 0); + + regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); /* PDP values well be assigned later if needed */ - ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), - 0); - - if (ppgtt && USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0); + CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); + + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. */ - ASSIGN_CTX_PML4(ppgtt, reg_state); + ASSIGN_CTX_PML4(ppgtt, regs); } - if (engine->id == RCS) { - reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - make_rpcs(dev_priv)); + if (rcs) { + regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, + make_rpcs(dev_priv)); } } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 0c852c024227..c8009c7bfbdd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -68,8 +68,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); -int intel_engines_init(struct drm_i915_private *dev_priv); - /* Logical Ring Contexts */ /* One extra page is added before LRC for GuC as shared data */ diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index c300647ef604..71cbe9c08932 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -162,21 +162,8 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); unsigned long start = jiffies; - if (!lspcon->desc_valid) - return; - while (1) { - struct intel_dp_desc desc; - - /* - * The w/a only applies in PCON mode and we don't expect any - * AUX errors. - */ - if (!__intel_dp_read_desc(intel_dp, &desc)) - return; - - if (intel_digital_port_connected(dev_priv, dig_port) && - !memcmp(&intel_dp->desc, &desc, sizeof(desc))) { + if (intel_digital_port_connected(dev_priv, dig_port)) { DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; @@ -253,7 +240,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) return false; } - lspcon->desc_valid = intel_dp_read_desc(dp); + intel_dp_read_desc(dp); DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 9ca4dc4d2378..8b942ef2b3ec 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -91,12 +91,11 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -114,7 +113,7 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1066,6 +1065,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_LVDS; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; if (HAS_PCH_SPLIT(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c787fc4e6eb9..92e461c68385 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; @@ -191,7 +191,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, "Platform that should have a MOCS table does not.\n"); } - /* WaDisableSkipCaching:skl,bxt,kbl */ + /* WaDisableSkipCaching:skl,bxt,kbl,glk */ if (IS_GEN9(dev_priv)) { int i; @@ -276,23 +276,22 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) static int emit_mocs_control_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; enum intel_engine_id engine = req->engine->id; unsigned int index; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); for (index = 0; index < table->size; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[index].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[index].control_value; } /* @@ -304,12 +303,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, * that value to all the used entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[0].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[0].control_value; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -336,29 +335,27 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; unsigned int i; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); for (i = 0; i < table->size/2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); } if (table->size & 0x01) { /* Odd table size - 1 left over */ - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 0); i++; } @@ -368,12 +365,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, * they are reserved by the hardware. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 0, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 0, 0); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 0608fad7f593..5ef9f5bfb92c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -267,8 +267,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; - int ret; + u32 *cs; WARN_ON(overlay->active); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); @@ -277,10 +276,10 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 4); - if (ret) { + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } overlay->active = true; @@ -288,12 +287,11 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_I830(dev_priv)) i830_overlay_clock_gating(dev_priv, false); - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_ON; + *cs++ = overlay->flip_addr | OFC_UPDATE; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return intel_overlay_do_wait_request(overlay, req, NULL); } @@ -326,10 +324,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; - u32 tmp; - int ret; + u32 tmp, *cs; WARN_ON(!overlay->active); @@ -345,16 +341,15 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, vma); @@ -408,9 +403,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, static int intel_overlay_off(struct intel_overlay *overlay) { struct drm_i915_gem_request *req; - struct intel_ring *ring; - u32 flip_addr = overlay->flip_addr; - int ret; + u32 *cs, flip_addr = overlay->flip_addr; WARN_ON(!overlay->active); @@ -424,25 +417,23 @@ static int intel_overlay_off(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 6); - if (ret) { + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - /* wait for overlay to go idle */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; /* turn overlay off */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_OFF; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, NULL); @@ -465,6 +456,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; + u32 *cs; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -478,23 +470,20 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; - struct intel_ring *ring; req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, - MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 1a6ff26dea20..cb50c527401f 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1315,7 +1315,7 @@ static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_PINEVIEW(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 32); } @@ -1333,7 +1333,7 @@ static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_G4X(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 128); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 249623d45be0..b38707efb620 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -65,12 +65,12 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_CONFIG0, I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); - /* WaEnableChickenDCPR:skl,bxt,kbl */ + /* WaEnableChickenDCPR:skl,bxt,kbl,glk */ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ - /* WaFbcWakeMemOn:skl,bxt,kbl */ + /* WaFbcWakeMemOn:skl,bxt,kbl,glk */ I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS | DISP_FBC_MEMORY_WAKE); @@ -99,9 +99,31 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) * Wa: Backlight PWM may stop in the asserted state, causing backlight * to stay fully on. */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | - PWM1_GATING_DIS | PWM2_GATING_DIS); + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); +} + +static void glk_init_clock_gating(struct drm_i915_private *dev_priv) +{ + gen9_init_clock_gating(dev_priv); + + /* + * WaDisablePWMClockGating:glk + * Backlight PWM may stop in the asserted state, causing backlight + * to stay fully on. + */ + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* WaDDIIOTimeout:glk */ + if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) { + u32 val = I915_READ(CHICKEN_MISC_2); + val &= ~(GLK_CL0_PWR_DOWN | + GLK_CL1_PWR_DOWN | + GLK_CL2_PWR_DOWN); + I915_WRITE(CHICKEN_MISC_2, val); + } + } static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) @@ -1701,39 +1723,6 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(FW_BLC, fwater_lo); } -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) -{ - uint32_t pixel_rate; - - pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; - - /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to - * adjust the pixel_rate here. */ - - if (pipe_config->pch_pfit.enabled) { - uint64_t pipe_w, pipe_h, pfit_w, pfit_h; - uint32_t pfit_size = pipe_config->pch_pfit.size; - - pipe_w = pipe_config->pipe_src_w; - pipe_h = pipe_config->pipe_src_h; - - pfit_w = (pfit_size >> 16) & 0xFFFF; - pfit_h = pfit_size & 0xFFFF; - if (pipe_w < pfit_w) - pipe_w = pfit_w; - if (pipe_h < pfit_h) - pipe_h = pfit_h; - - if (WARN_ON(!pfit_w || !pfit_h)) - return pixel_rate; - - pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, - pfit_w * pfit_h); - } - - return pixel_rate; -} - /* latency must be in 0.1us units. */ static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) { @@ -1807,12 +1796,12 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); if (!is_lp) return method1; - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1836,8 +1825,8 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1863,7 +1852,7 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, if (!cstate->base.active) return 0; - return ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + return ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, width, cpp, mem_value); } @@ -2095,7 +2084,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) return 0; if (WARN_ON(adjusted_mode->crtc_clock == 0)) return 0; - if (WARN_ON(intel_state->cdclk == 0)) + if (WARN_ON(intel_state->cdclk.logical.cdclk == 0)) return 0; /* The WM are computed with base on how long it takes to fill a single @@ -2104,7 +2093,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, adjusted_mode->crtc_clock); ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, - intel_state->cdclk); + intel_state->cdclk.logical.cdclk); return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | PIPE_WM_LINETIME_TIME(linetime); @@ -2173,7 +2162,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, } /* - * WaWmMemoryReadLatency:skl + * WaWmMemoryReadLatency:skl,glk * * punit doesn't take into account the read latency so we need * to add 2us to the various latency levels we retrieve from the @@ -2895,8 +2884,7 @@ static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || - IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) return true; return false; @@ -3547,7 +3535,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst * Adjusted plane pixel rate is just the pipe's adjusted pixel rate * with additional adjustments for plane-specific scaling. */ - adjusted_pixel_rate = ilk_pipe_pixel_rate(cstate); + adjusted_pixel_rate = cstate->pixel_rate; downscale_amount = skl_plane_downscale_amount(pstate); pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; @@ -3775,7 +3763,7 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) if (!cstate->base.active) return 0; - pixel_rate = ilk_pipe_pixel_rate(cstate); + pixel_rate = cstate->pixel_rate; if (WARN_ON(pixel_rate == 0)) return 0; @@ -4934,16 +4922,8 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) /* gen6_set_rps is called to update the frequency request, but should also be * called when the range (min_delay and max_delay) is modified so that we can * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - return; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); - /* min/max delay may still have been modified so be sure to * write the limits value. */ @@ -4969,17 +4949,15 @@ static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - POSTING_READ(GEN6_RPNSWREQ); - dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } -static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) { - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); + int err; if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), "Odd GPU freq value\n")) @@ -4988,13 +4966,17 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); if (val != dev_priv->rps.cur_freq) { - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - if (!IS_CHERRYVIEW(dev_priv)) - gen6_set_rps_thresholds(dev_priv, val); + err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + if (err) + return err; + + gen6_set_rps_thresholds(dev_priv, val); } dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down @@ -5007,6 +4989,7 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { u32 val = dev_priv->rps.idle_freq; + int err; if (dev_priv->rps.cur_freq <= val) return; @@ -5024,14 +5007,19 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) * power than the render powerwell. */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); + + if (err) + DRM_ERROR("Failed to set RPS for idle\n"); } void gen6_rps_busy(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->rps.hw_lock); if (dev_priv->rps.enabled) { + u8 freq; + if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, @@ -5039,11 +5027,17 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) gen6_enable_rps_interrupts(dev_priv); - /* Ensure we start at the user's desired frequency */ - intel_set_rps(dev_priv, - clamp(dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit)); + /* Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + freq = max(dev_priv->rps.cur_freq, + dev_priv->rps.efficient_freq); + + if (intel_set_rps(dev_priv, + clamp(freq, + dev_priv->rps.min_freq_softlimit, + dev_priv->rps.max_freq_softlimit))) + DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } mutex_unlock(&dev_priv->rps.hw_lock); } @@ -5111,12 +5105,25 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, spin_unlock(&dev_priv->rps.client_lock); } -void intel_set_rps(struct drm_i915_private *dev_priv, u8 val) +int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { + int err; + + lockdep_assert_held(&dev_priv->rps.hw_lock); + GEM_BUG_ON(val > dev_priv->rps.max_freq); + GEM_BUG_ON(val < dev_priv->rps.min_freq); + + if (!dev_priv->rps.enabled) { + dev_priv->rps.cur_freq = val; + return 0; + } + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); else - gen6_set_rps(dev_priv, val); + err = gen6_set_rps(dev_priv, val); + + return err; } static void gen9_disable_rc6(struct drm_i915_private *dev_priv) @@ -5294,7 +5301,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + IS_GEN9_BC(dev_priv)) { u32 ddcc_status = 0; if (sandybridge_pcode_read(dev_priv, @@ -5307,7 +5314,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.max_freq); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -5320,7 +5327,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) } static void reset_rps(struct drm_i915_private *dev_priv, - void (*set)(struct drm_i915_private *, u8)) + int (*set)(struct drm_i915_private *, u8)) { u8 freq = dev_priv->rps.cur_freq; @@ -5328,7 +5335,8 @@ static void reset_rps(struct drm_i915_private *dev_priv, dev_priv->rps.power = -1; dev_priv->rps.cur_freq = -1; - set(dev_priv, freq); + if (set(dev_priv, freq)) + DRM_ERROR("Failed to reset RPS to initial values\n"); } /* See the Gen9_GT_PM_Programming_Guide doc for the below */ @@ -5336,22 +5344,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - /* - * BIOS could leave the Hw Turbo enabled, so need to explicitly - * clear out the Control register just to avoid inconsitency - * with debugfs interface, which will show Turbo as enabled - * only and that is not expected by the User after adding the - * WaGsvDisableTurbo. Apart from this there is no problem even - * if the Turbo is left enabled in the Control register, as the - * Up/Down interrupts would remain masked. - */ - gen9_disable_rps(dev_priv); - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - return; - } - /* Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RC_VIDEO_FREQ, GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); @@ -5411,18 +5403,9 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) if (intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); - /* WaRsUseTimeoutMode:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - } else { - I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); - } + I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + I915_WRITE(GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask); /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. @@ -5637,7 +5620,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; @@ -5655,7 +5638,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. @@ -5739,6 +5722,17 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) return rp1; } +static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpn; + + val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); + rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & + FB_GFX_FREQ_FUSE_MASK); + + return rpn; +} + static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) { u32 val, rp1; @@ -5975,8 +5969,7 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), dev_priv->rps.rp1_freq); - /* PUnit validated range is only [RPe, RP0] */ - dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; + dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); @@ -6775,7 +6768,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); @@ -7260,6 +7253,14 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); lpt_init_clock_gating(dev_priv); + + /* WaDisableDopClockGating:bdw + * + * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP + * clock gating. + */ + I915_WRITE(GEN6_UCGCTL1, + I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } static void haswell_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7656,8 +7657,10 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) dev_priv->display.init_clock_gating = skylake_init_clock_gating; else if (IS_KABYLAKE(dev_priv)) dev_priv->display.init_clock_gating = kabylake_init_clock_gating; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_BROXTON(dev_priv)) dev_priv->display.init_clock_gating = bxt_init_clock_gating; + else if (IS_GEMINILAKE(dev_priv)) + dev_priv->display.init_clock_gating = glk_init_clock_gating; else if (IS_BROADWELL(dev_priv)) dev_priv->display.init_clock_gating = broadwell_init_clock_gating; else if (IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 91bc4abf5d3e..f62afffef682 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -39,7 +39,7 @@ */ #define LEGACY_REQUEST_SIZE 200 -int __intel_ring_space(int head, int tail, int size) +static int __intel_ring_space(int head, int tail, int size) { int space = head - tail; if (space <= 0) @@ -61,22 +61,20 @@ void intel_ring_update_space(struct intel_ring *ring) static int gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -84,9 +82,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; /* * read/write caches: @@ -123,13 +119,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd |= MI_INVALIDATE_ISP; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -174,35 +170,33 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - int ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); /* low dword */ - intel_ring_emit(ring, 0); /* high dword */ - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + u32 *cs; + + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; /* low dword */ + *cs++ = 0; /* high dword */ + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -210,10 +204,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) static int gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; + u32 *cs, flags = 0; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ @@ -247,15 +240,15 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -263,20 +256,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -284,11 +274,9 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) static int gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; /* * Ensure that any following seqno writes only happen when the render @@ -332,37 +320,15 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen7_render_ring_cs_stall_wa(req); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); - - return 0; -} - -static int -gen8_emit_pipe_control(struct drm_i915_gem_request *req, - u32 flags, u32 scratch_addr) -{ - struct intel_ring *ring = req->ring; - int ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -370,12 +336,14 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req, static int gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; + u32 flags; + u32 *cs; - flags |= PIPE_CONTROL_CS_STALL; + cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + flags = PIPE_CONTROL_CS_STALL; if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; @@ -394,15 +362,19 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ - ret = gen8_emit_pipe_control(req, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD, - 0); - if (ret) - return ret; + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD, + 0); } - return gen8_emit_pipe_control(req, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, + i915_ggtt_offset(req->engine->scratch) + + 2 * CACHELINE_BYTES); + + intel_ring_advance(req, cs); + + return 0; } static void ring_setup_phys_status_page(struct intel_engine_cs *engine) @@ -657,41 +629,6 @@ static void reset_ring_common(struct intel_engine_cs *engine, } } -static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - struct intel_ring *ring = req->ring; - struct i915_workarounds *w = &req->i915->workarounds; - int ret, i; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - ret = intel_ring_begin(req, (w->count * 2 + 2)); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); - for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); - } - intel_ring_emit(ring, MI_NOOP); - - intel_ring_advance(ring); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - - return 0; -} - static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) { int ret; @@ -707,498 +644,6 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) return 0; } -static int wa_add(struct drm_i915_private *dev_priv, - i915_reg_t addr, - const u32 mask, const u32 val) -{ - const u32 idx = dev_priv->workarounds.count; - - if (WARN_ON(idx >= I915_MAX_WA_REGS)) - return -ENOSPC; - - dev_priv->workarounds.reg[idx].addr = addr; - dev_priv->workarounds.reg[idx].value = val; - dev_priv->workarounds.reg[idx].mask = mask; - - dev_priv->workarounds.count++; - - return 0; -} - -#define WA_REG(addr, mask, val) do { \ - const int r = wa_add(dev_priv, (addr), (mask), (val)); \ - if (r) \ - return r; \ - } while (0) - -#define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, mask, _MASKED_FIELD(mask, value)) - -#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) - -#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) - -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const uint32_t index = wa->hw_whitelist_count[engine->id]; - - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; - - return 0; -} - -static int gen8_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); - - /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); - - /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:bdw,chv */ - /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); - - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping - * polygons in the same 8x4 pixel/sample area to be processed without - * stalling waiting for the earlier ones to write to Hierarchical Z - * buffer." - * - * This optimization is off by default for BDW and CHV; turn it on. - */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - - return 0; -} - -static int bdw_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* WaDisableDopClockGating:bdw */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - - return 0; -} - -static int gen9_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */ - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */ - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - - /* WaDisableKillLogic:bxt,skl,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - ECOCHK_DIS_TLB); - - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - - /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_DG_MIRROR_FIX_ENABLE); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); - /* - * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set - * but we do that in per ctx batchbuffer as there is an issue - * with this register not getting restored on ctx restore - */ - } - - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_GPGPU_PREEMPTION); - - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); - - /* WaDisableMaskBasedCammingInRCC:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, - PIXEL_MASK_CAMMING_DISABLE); - - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); - - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are - * both tied to WaForceContextSaveRestoreNonCoherent - * in some hsds for skl. We keep the tie for all gen9. The - * documentation is a bit hazy and so we want to get common behaviour, - * even though there is no clear evidence we would need both on kbl/bxt. - * This area has been source of system hangs so we play it safe - * and mimic the skl regardless of what bspec says. - * - * Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - - /* WaForceEnableNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableHDCInvalidation:skl,bxt,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); - - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ - if (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || - IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - - /* WaOCLCoherentLineFlush:skl,bxt,kbl */ - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_FLUSH_COHERENT_LINES)); - - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; - - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; -} - -static int skl_tune_iz_hashing(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - u8 vals[3] = { 0, 0, 0 }; - unsigned int i; - - for (i = 0; i < 3; i++) { - u8 ss; - - /* - * Only consider slices where one, and only one, subslice has 7 - * EUs - */ - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) - continue; - - /* - * subslice_7eu[i] != 0 (because of the check above) and - * ss_max == 4 (maximum number of subslices possible per slice) - * - * -> 0 <= ss <= 3; - */ - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; - vals[i] = 3 - ss; - } - - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return 0; - - /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN9_IZ_HASHING_MASK(2) | - GEN9_IZ_HASHING_MASK(1) | - GEN9_IZ_HASHING_MASK(0), - GEN9_IZ_HASHING(2, vals[2]) | - GEN9_IZ_HASHING(1, vals[1]) | - GEN9_IZ_HASHING(0, vals[0])); - - return 0; -} - -static int skl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* - * Actual WA is to disable percontext preemption granularity control - * until D0 which is the default case so this is equivalent to - * !WaDisablePerCtxtPreemptionGranularityControl:skl - */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - - /* WaEnableGapsTsvCreditFix:skl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableGafsUnitClkGating:skl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return skl_tune_iz_hashing(engine); -} - -static int bxt_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaStoreMultiplePTEenable:bxt */ - /* This is a requirement according to Hardware specification */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); - - /* WaSetClckGatingDisableMedia:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); - } - - /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); - - /* WaDisablePooledEuLoadBalancingFix:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { - WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, - GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); - } - - /* WaDisableSbeCacheDispatchPortSharing:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - } - - /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ - /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ - /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ - /* WaDisableLSQCROPERFforOCL:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); - if (ret) - return ret; - - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - } - - /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); - - /* WaToEnableHwFixForPushConstHWBug:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaInPlaceDecompressionHang:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - return 0; -} - -static int kbl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:kbl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) - WA_SET_BIT(GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); - - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FENCE_DEST_SLM_DISABLE); - - /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:kbl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:kbl */ - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; -} - -int init_workarounds_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WARN_ON(engine->id != RCS); - - dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[RCS] = 0; - - if (IS_BROADWELL(dev_priv)) - return bdw_init_workarounds(engine); - - if (IS_CHERRYVIEW(dev_priv)) - return chv_init_workarounds(engine); - - if (IS_SKYLAKE(dev_priv)) - return skl_init_workarounds(engine); - - if (IS_BROXTON(dev_priv)) - return bxt_init_workarounds(engine); - - if (IS_KABYLAKE(dev_priv)) - return kbl_init_workarounds(engine); - - return 0; -} - static int init_render_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1257,7 +702,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) i915_vma_unpin_and_release(&dev_priv->semaphore); } -static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1268,23 +713,22 @@ static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - *out++ = lower_32_bits(gtt_offset); - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = 0; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL; + *cs++ = lower_32_bits(gtt_offset); + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = 0; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1295,19 +739,19 @@ static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *engine; @@ -1322,16 +766,16 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { - *out++ = MI_LOAD_REGISTER_IMM(1); - *out++ = i915_mmio_reg_offset(mbox_reg); - *out++ = req->global_seqno; + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(mbox_reg); + *cs++ = req->global_seqno; num_rings++; } } if (num_rings & 1) - *out++ = MI_NOOP; + *cs++ = MI_NOOP; - return out; + return cs; } static void i9xx_submit_request(struct drm_i915_gem_request *request) @@ -1340,18 +784,19 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); I915_WRITE_TAIL(request->engine, request->tail); } -static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { - *out++ = MI_STORE_DWORD_INDEX; - *out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; - *out++ = req->global_seqno; - *out++ = MI_USER_INTERRUPT; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; + *cs++ = req->global_seqno; + *cs++ = MI_USER_INTERRUPT; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); + GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -1364,34 +809,33 @@ static const int i9xx_emit_breadcrumb_sz = 4; * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { return i9xx_emit_breadcrumb(req, - req->engine->semaphore.signal(req, out)); + req->engine->semaphore.signal(req, cs)); } static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) + u32 *cs) { struct intel_engine_cs *engine = req->engine; if (engine->semaphore.signal) - out = engine->semaphore.signal(req, out); - - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(engine); - *out++ = 0; - *out++ = req->global_seqno; + cs = engine->semaphore.signal(req, cs); + + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(engine); + *cs++ = 0; + *cs++ = req->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); + GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); } static const int gen8_render_emit_breadcrumb_sz = 8; @@ -1408,24 +852,21 @@ static int gen8_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); struct i915_hw_ppgtt *ppgtt; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(ring, signal->global_seqno); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_advance(ring); + *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = signal->global_seqno; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + intel_ring_advance(req, cs); /* When the !RCS engines idle waiting upon a semaphore, they lose their * pagetables and we must reload them before executing the batch. @@ -1442,28 +883,27 @@ static int gen6_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; - int ret; + u32 *cs; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, dw1 | wait_mbox); + *cs++ = dw1 | wait_mbox; /* Throughout all of the GEM code, seqno passed implies our current * seqno is >= the last seqno executed. However for hardware the * comparison is strictly greater than. */ - intel_ring_emit(ring, signal->global_seqno - 1); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = signal->global_seqno - 1; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1564,16 +1004,15 @@ i8xx_irq_disable(struct intel_engine_cs *engine) static int bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1639,20 +1078,16 @@ i965_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - MI_BATCH_GTT | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & + I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -1666,59 +1101,56 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - u32 cs_offset = i915_ggtt_offset(req->engine->scratch); - int ret; + u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Evict the invalid PTE TLBs */ - intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); - intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 0xdeadbeef); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; + *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ + *cs++ = cs_offset; + *cs++ = 0xdeadbeef; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; - ret = intel_ring_begin(req, 6 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 6 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Blit the batch (which has now all relocs applied) to the * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, - BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); - intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 4096); - intel_ring_emit(ring, offset); - - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; + *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; + *cs++ = cs_offset; + *cs++ = 4096; + *cs++ = offset; + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); /* ... and execute it. */ offset = cs_offset; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -1728,17 +1160,16 @@ i915_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -1985,7 +1416,7 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int context_pin(struct i915_gem_context *ctx, unsigned int flags) +static int context_pin(struct i915_gem_context *ctx) { struct i915_vma *vma = ctx->engine[RCS].state; int ret; @@ -2000,7 +1431,7 @@ static int context_pin(struct i915_gem_context *ctx, unsigned int flags) return ret; } - return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); + return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | PIN_HIGH); } static int intel_ring_context_pin(struct intel_engine_cs *engine, @@ -2015,13 +1446,7 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, return 0; if (ce->state) { - unsigned int flags; - - flags = 0; - if (i915_gem_context_is_kernel(ctx)) - flags = PIN_HIGH; - - ret = context_pin(ctx, flags); + ret = context_pin(ctx); if (ret) goto error; } @@ -2152,7 +1577,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) static int ring_request_alloc(struct drm_i915_gem_request *request) { - int ret; + u32 *cs; GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); @@ -2165,9 +1590,9 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) GEM_BUG_ON(!request->engine->buffer); request->ring = request->engine->buffer; - ret = intel_ring_begin(request, 0); - if (ret) - return ret; + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) + return PTR_ERR(cs); request->reserved_space -= LEGACY_REQUEST_SIZE; return 0; @@ -2222,7 +1647,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) return 0; } -int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) +u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { struct intel_ring *ring = req->ring; int remain_actual = ring->size - ring->tail; @@ -2230,6 +1655,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; + u32 *cs; total_bytes = bytes + req->reserved_space; @@ -2256,7 +1682,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) if (wait_bytes > ring->space) { int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) - return ret; + return ERR_PTR(ret); } if (unlikely(need_wrap)) { @@ -2269,31 +1695,34 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ring->space -= remain_actual; } + GEM_BUG_ON(ring->tail > ring->size - bytes); + cs = ring->vaddr + ring->tail; + ring->tail += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); - return 0; + + return cs; } /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; int num_dwords = - (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); - int ret; + (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + u32 *cs; if (num_dwords == 0) return 0; num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - ret = intel_ring_begin(req, num_dwords); - if (ret) - return ret; + cs = intel_ring_begin(req, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); while (num_dwords--) - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2337,13 +1766,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2365,16 +1792,16 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2383,23 +1810,21 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & + I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -2409,22 +1834,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2434,20 +1856,17 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2456,13 +1875,11 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2483,17 +1900,16 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) */ if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 79c2b8d72322..0f29e07a9581 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_selftest.h" #define I915_CMD_HASH_ORDER 9 @@ -144,6 +145,7 @@ struct intel_ring { u32 head; u32 tail; + int space; int size; int effective_size; @@ -211,6 +213,11 @@ struct intel_engine_cs { struct intel_render_state *render_state; + atomic_t irq_count; + unsigned long irq_posted; +#define ENGINE_IRQ_BREADCRUMB 0 +#define ENGINE_IRQ_EXECLIST 1 + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -229,21 +236,21 @@ struct intel_engine_cs { */ struct intel_breadcrumbs { struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ - bool irq_posted; spinlock_t lock; /* protects the lists of requests; irqsafe */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ struct intel_wait *first_wait; /* oldest waiter by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct drm_i915_gem_request *first_signal; + struct drm_i915_gem_request __rcu *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ - unsigned long timeout; + unsigned int hangcheck_interrupts; bool irq_enabled : 1; bool rpm_wakelock : 1; + I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; /* @@ -285,7 +292,7 @@ struct intel_engine_cs { #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) void (*emit_breadcrumb)(struct drm_i915_gem_request *req, - u32 *out); + u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -368,7 +375,7 @@ struct intel_engine_cs { /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal); - u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out); + u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); } semaphore; /* Execlists */ @@ -376,13 +383,11 @@ struct intel_engine_cs { struct execlist_port { struct drm_i915_gem_request *request; unsigned int count; + GEM_DEBUG_DECL(u32 context_id); } execlist_port[2]; struct rb_root execlist_queue; struct rb_node *execlist_first; unsigned int fw_domains; - bool disable_lite_restore_wa; - bool preempt_wa; - u32 ctx_desc_template; /* Contexts are pinned whilst they are active on the GPU. The last * context executed remains active whilst the GPU is idle - the @@ -492,21 +497,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_ring *ring, u32 data) -{ - *(uint32_t *)(ring->vaddr + ring->tail) = data; - ring->tail += 4; -} +u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n); -static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) -{ - intel_ring_emit(ring, i915_mmio_reg_offset(reg)); -} - -static inline void intel_ring_advance(struct intel_ring *ring) +static inline void +intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) { /* Dummy function. * @@ -516,16 +512,18 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ + GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); } -static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) +static inline u32 +intel_ring_offset(struct drm_i915_gem_request *req, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - ring->vaddr; - return offset & (ring->size - 1); + u32 offset = addr - req->ring->vaddr; + GEM_BUG_ON(offset > req->ring->size); + return offset & (req->ring->size - 1); } -int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); @@ -558,10 +556,11 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->last_submitted_seqno); + return READ_ONCE(engine->timeline->seqno); } int init_workarounds_ring(struct intel_engine_cs *engine); +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); @@ -583,10 +582,47 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) +static inline void intel_wait_init(struct intel_wait *wait) { wait->tsk = current; +} + +static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) +{ + wait->tsk = current; + wait->seqno = seqno; +} + +static inline bool intel_wait_has_seqno(const struct intel_wait *wait) +{ + return wait->seqno; +} + +static inline bool +intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) +{ wait->seqno = seqno; + return intel_wait_has_seqno(wait); +} + +static inline bool +intel_wait_update_request(struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); +} + +static inline bool +intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) +{ + return wait->seqno == seqno; +} + +static inline bool +intel_wait_check_request(const struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); } static inline bool intel_wait_complete(const struct intel_wait *wait) @@ -599,6 +635,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_enable_signaling(struct drm_i915_gem_request *request); +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { @@ -631,6 +668,17 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); + +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +{ + memset(batch, 0, 6 * sizeof(u32)); + + batch[0] = GFX_OP_PIPE_CONTROL(6); + batch[1] = flags; + batch[2] = offset; + + return batch + 6; +} #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index c0b7e95b5b8e..012bc358a33a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -49,19 +49,6 @@ * present for a given platform. */ -#define for_each_power_well(i, power_well, domain_mask, power_domains) \ - for (i = 0; \ - i < (power_domains)->power_well_count && \ - ((power_well) = &(power_domains)->power_wells[i]); \ - i++) \ - for_each_if ((power_well)->domains & (domain_mask)) - -#define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \ - for (i = (power_domains)->power_well_count - 1; \ - i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\ - i--) \ - for_each_if ((power_well)->domains & (domain_mask)) - bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, int power_well_id); @@ -106,6 +93,16 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_DDI_D_LANES"; case POWER_DOMAIN_PORT_DDI_E_LANES: return "PORT_DDI_E_LANES"; + case POWER_DOMAIN_PORT_DDI_A_IO: + return "PORT_DDI_A_IO"; + case POWER_DOMAIN_PORT_DDI_B_IO: + return "PORT_DDI_B_IO"; + case POWER_DOMAIN_PORT_DDI_C_IO: + return "PORT_DDI_C_IO"; + case POWER_DOMAIN_PORT_DDI_D_IO: + return "PORT_DDI_D_IO"; + case POWER_DOMAIN_PORT_DDI_E_IO: + return "PORT_DDI_E_IO"; case POWER_DOMAIN_PORT_DSI: return "PORT_DSI"; case POWER_DOMAIN_PORT_CRT: @@ -198,19 +195,15 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains; struct i915_power_well *power_well; bool is_enabled; - int i; if (dev_priv->pm.suspended) return false; - power_domains = &dev_priv->power_domains; - is_enabled = true; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) { if (power_well->always_on) continue; @@ -385,124 +378,121 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, } #define SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_A_E_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO)) +#define GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO)) +#define GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO)) #define GLK_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS ( \ GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static void assert_can_enable_dc9(struct drm_i915_private *dev_priv) { @@ -732,7 +722,7 @@ gen9_sanitize_power_well_requests(struct drm_i915_private *dev_priv, * other request bits to be set, so WARN for those. */ if (power_well_id == SKL_DISP_PW_1 || - ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + (IS_GEN9_BC(dev_priv) && power_well_id == SKL_DISP_PW_MISC_IO)) DRM_DEBUG_DRIVER("Clearing auxiliary requests for %s forced on " "by DMC\n", power_well->name); @@ -847,14 +837,14 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - hsw_set_power_well(dev_priv, power_well, power_well->count > 0); - - /* - * We're taking over the BIOS, so clear any requests made by it since - * the driver is in charge now. - */ - if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) + /* Take over the request bit if set by BIOS. */ + if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) { + if (!(I915_READ(HSW_PWR_WELL_DRIVER) & + HSW_PWR_WELL_ENABLE_REQUEST)) + I915_WRITE(HSW_PWR_WELL_DRIVER, + HSW_PWR_WELL_ENABLE_REQUEST); I915_WRITE(HSW_PWR_WELL_BIOS, 0); + } } static void hsw_power_well_enable(struct drm_i915_private *dev_priv, @@ -881,10 +871,17 @@ static bool skl_power_well_enabled(struct drm_i915_private *dev_priv, static void skl_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - skl_set_power_well(dev_priv, power_well, power_well->count > 0); + uint32_t mask = SKL_POWER_WELL_REQ(power_well->id); + uint32_t bios_req = I915_READ(HSW_PWR_WELL_BIOS); + + /* Take over the request bit if set by BIOS. */ + if (bios_req & mask) { + uint32_t drv_req = I915_READ(HSW_PWR_WELL_DRIVER); - /* Clear any request made by BIOS as driver is taking over */ - I915_WRITE(HSW_PWR_WELL_BIOS, 0); + if (!(drv_req & mask)) + I915_WRITE(HSW_PWR_WELL_DRIVER, drv_req | mask); + I915_WRITE(HSW_PWR_WELL_BIOS, bios_req & ~mask); + } } static void skl_power_well_enable(struct drm_i915_private *dev_priv, @@ -917,16 +914,6 @@ static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv, return bxt_ddi_phy_is_enabled(dev_priv, power_well->data); } -static void bxt_dpio_cmn_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - if (power_well->count > 0) - bxt_dpio_cmn_power_well_enable(dev_priv, power_well); - else - bxt_dpio_cmn_power_well_disable(dev_priv, power_well); -} - - static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) { struct i915_power_well *power_well; @@ -964,10 +951,12 @@ static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { + struct intel_cdclk_state cdclk_state = {}; + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); - WARN_ON(dev_priv->cdclk_freq != - dev_priv->display.get_display_clock_speed(dev_priv)); + dev_priv->display.get_cdclk(dev_priv, &cdclk_state); + WARN_ON(!intel_cdclk_state_compare(&dev_priv->cdclk.hw, &cdclk_state)); gen9_assert_dbuf_enabled(dev_priv); @@ -987,13 +976,9 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, gen9_enable_dc5(dev_priv); } -static void gen9_dc_off_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) { - if (power_well->count > 0) - gen9_dc_off_power_well_enable(dev_priv, power_well); - else - gen9_dc_off_power_well_disable(dev_priv, power_well); } static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv, @@ -1043,12 +1028,6 @@ out: mutex_unlock(&dev_priv->rps.hw_lock); } -static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, power_well->count > 0); -} - static void vlv_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1249,7 +1228,7 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } -#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) +#define POWER_DOMAIN_MASK (GENMASK_ULL(POWER_DOMAIN_NUM - 1, 0)) static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, int power_well_id) @@ -1659,14 +1638,6 @@ out: mutex_unlock(&dev_priv->rps.hw_lock); } -static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->id != PIPE_A); - - chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); -} - static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1693,9 +1664,8 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; - for_each_power_well(i, power_well, BIT(domain), power_domains) + for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_get(dev_priv, power_well); power_domains->domain_use_count[domain]++; @@ -1779,7 +1749,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains; struct i915_power_well *power_well; - int i; power_domains = &dev_priv->power_domains; @@ -1790,7 +1759,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_display_power_domain_str(domain)); power_domains->domain_use_count[domain]--; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); @@ -1799,134 +1768,134 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, } #define HSW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BDW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { - .sync_hw = i9xx_always_on_power_well_noop, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, .disable = i9xx_always_on_power_well_noop, .is_enabled = i9xx_always_on_power_well_enabled, }; static const struct i915_power_well_ops chv_pipe_power_well_ops = { - .sync_hw = chv_pipe_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_pipe_power_well_enable, .disable = chv_pipe_power_well_disable, .is_enabled = chv_pipe_power_well_enabled, }; static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_dpio_cmn_power_well_enable, .disable = chv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, @@ -1956,14 +1925,14 @@ static const struct i915_power_well_ops skl_power_well_ops = { }; static const struct i915_power_well_ops gen9_dc_off_power_well_ops = { - .sync_hw = gen9_dc_off_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = gen9_dc_off_power_well_enable, .disable = gen9_dc_off_power_well_disable, .is_enabled = gen9_dc_off_power_well_enabled, }; static const struct i915_power_well_ops bxt_dpio_cmn_power_well_ops = { - .sync_hw = bxt_dpio_cmn_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = bxt_dpio_cmn_power_well_enable, .disable = bxt_dpio_cmn_power_well_disable, .is_enabled = bxt_dpio_cmn_power_well_enabled, @@ -1998,21 +1967,21 @@ static struct i915_power_well bdw_power_wells[] = { }; static const struct i915_power_well_ops vlv_display_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_display_power_well_enable, .disable = vlv_display_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_dpio_cmn_power_well_enable, .disable = vlv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_power_well_enable, .disable = vlv_power_well_disable, .is_enabled = vlv_power_well_enabled, @@ -2155,26 +2124,26 @@ static struct i915_power_well skl_power_wells[] = { .id = SKL_DISP_PW_2, }, { - .name = "DDI A/E power well", - .domains = SKL_DISPLAY_DDI_A_E_POWER_DOMAINS, + .name = "DDI A/E IO power well", + .domains = SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_A_E, }, { - .name = "DDI B power well", - .domains = SKL_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = SKL_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, { - .name = "DDI D power well", - .domains = SKL_DISPLAY_DDI_D_POWER_DOMAINS, + .name = "DDI D IO power well", + .domains = SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_D, }, @@ -2287,20 +2256,20 @@ static struct i915_power_well glk_power_wells[] = { .id = GLK_DISP_PW_AUX_C, }, { - .name = "DDI A power well", - .domains = GLK_DISPLAY_DDI_A_POWER_DOMAINS, + .name = "DDI A IO power well", + .domains = GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = GLK_DISP_PW_DDI_A, }, { - .name = "DDI B power well", - .domains = GLK_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = GLK_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, @@ -2323,7 +2292,7 @@ static uint32_t get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { max_dc = 2; mask = 0; } else if (IS_GEN9_LP(dev_priv)) { @@ -2386,7 +2355,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, i915.enable_dc); - BUILD_BUG_ON(POWER_DOMAIN_NUM > 31); + BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); mutex_init(&power_domains->lock); @@ -2398,7 +2367,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) set_power_wells(power_domains, hsw_power_wells); } else if (IS_BROADWELL(dev_priv)) { set_power_wells(power_domains, bdw_power_wells); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { set_power_wells(power_domains, skl_power_wells); } else if (IS_BROXTON(dev_priv)) { set_power_wells(power_domains, bxt_power_wells); @@ -2454,10 +2423,9 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; mutex_lock(&power_domains->lock); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { + for_each_power_well(dev_priv, power_well) { power_well->ops->sync_hw(dev_priv, power_well); power_well->hw_enabled = power_well->ops->is_enabled(dev_priv, power_well); @@ -2722,7 +2690,10 @@ static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) * @resume: Called from resume code paths or not * * This function initializes the hardware power domain state and enables all - * power domains using intel_display_set_init_power(). + * power wells belonging to the INIT power domain. Power wells in other + * domains (and not in the INIT domain) are referenced or disabled during the + * modeset state HW readout. After that the reference count of each power well + * must match its HW enabled state, see intel_power_domains_verify_state(). */ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) { @@ -2730,7 +2701,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) power_domains->initializing = true; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_display_core_init(dev_priv, resume); } else if (IS_GEN9_LP(dev_priv)) { bxt_display_core_init(dev_priv, resume); @@ -2769,12 +2740,92 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) if (!i915.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_display_core_uninit(dev_priv); else if (IS_GEN9_LP(dev_priv)) bxt_display_core_uninit(dev_priv); } +static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + + DRM_DEBUG_DRIVER("%-25s %d\n", + power_well->name, power_well->count); + + for_each_power_domain(domain, power_well->domains) + DRM_DEBUG_DRIVER(" %-23s %d\n", + intel_display_power_domain_str(domain), + power_domains->domain_use_count[domain]); + } +} + +/** + * intel_power_domains_verify_state - verify the HW/SW state for all power wells + * @dev_priv: i915 device instance + * + * Verify if the reference count of each power well matches its HW enabled + * state and the total refcount of the domains it belongs to. This must be + * called after modeset HW state sanitization, which is responsible for + * acquiring reference counts for any power wells in use and disabling the + * ones left on by BIOS but not required by any active output. + */ +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + bool dump_domain_info; + + mutex_lock(&power_domains->lock); + + dump_domain_info = false; + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + int domains_count; + bool enabled; + + /* + * Power wells not belonging to any domain (like the MISC_IO + * and PW1 power wells) are under FW control, so ignore them, + * since their state can change asynchronously. + */ + if (!power_well->domains) + continue; + + enabled = power_well->ops->is_enabled(dev_priv, power_well); + if ((power_well->count || power_well->always_on) != enabled) + DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)", + power_well->name, power_well->count, enabled); + + domains_count = 0; + for_each_power_domain(domain, power_well->domains) + domains_count += power_domains->domain_use_count[domain]; + + if (power_well->count != domains_count) { + DRM_ERROR("power well %s refcount/domain refcount mismatch " + "(refcount %d/domains refcount %d)\n", + power_well->name, power_well->count, + domains_count); + dump_domain_info = true; + } + } + + if (dump_domain_info) { + static bool dumped; + + if (!dumped) { + intel_power_domains_dump_info(dev_priv); + dumped = true; + } + } + + mutex_unlock(&power_domains->lock); +} + /** * intel_runtime_pm_get - grab a runtime pm reference * @dev_priv: i915 device instance diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 2ad13903a054..816a6f5a3fd9 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2981,6 +2981,7 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, /* encoder type will be decided later */ intel_encoder = &intel_sdvo->base; intel_encoder->type = INTEL_OUTPUT_SDVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_sdvo_enc_funcs, 0, diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 1a840bf92eea..9f782b5eb6e6 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -60,8 +60,7 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, } I915_WRITE(VLV_IOSF_ADDR, addr); - if (!is_read) - I915_WRITE(VLV_IOSF_DATA, *val); + I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val); I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd); if (intel_wait_for_register(dev_priv, @@ -74,7 +73,6 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, if (is_read) *val = I915_READ(VLV_IOSF_DATA); - I915_WRITE(VLV_IOSF_DATA, 0); return 0; } @@ -93,14 +91,18 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) return val; } -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { + int err; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); mutex_lock(&dev_priv->sb_lock); - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, - SB_CRWRDA_NP, addr, &val); + err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + SB_CRWRDA_NP, addr, &val); mutex_unlock(&dev_priv->sb_lock); + + return err; } u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 9ef54688872a..27e0752d1578 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -219,13 +219,22 @@ skl_update_plane(struct drm_plane *drm_plane, uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; + plane_ctl = PLANE_CTL_ENABLE; + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } else { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= skl_plane_ctl_rotation(rotation); if (key->flags) { diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index eb692e4ffe01..6ed1a3ce47b7 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1621,6 +1621,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_TVOUT; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe08885a5ba..441c51fd9746 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -133,6 +133,13 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma } static void +vgpu_fw_domains_nop(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + /* Guest driver doesn't need to takes care forcewake. */ +} + +static void fw_domains_posting_read(struct drm_i915_private *dev_priv) { struct intel_uncore_forcewake_domain *d; @@ -635,33 +642,6 @@ find_fw_domain(struct drm_i915_private *dev_priv, u32 offset) return entry->domains; } -static void -intel_fw_table_check(struct drm_i915_private *dev_priv) -{ - const struct intel_forcewake_range *ranges; - unsigned int num_ranges; - s32 prev; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - ranges = dev_priv->uncore.fw_domains_table; - if (!ranges) - return; - - num_ranges = dev_priv->uncore.fw_domains_table_entries; - - for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { - WARN_ON_ONCE(IS_GEN9(dev_priv) && - (prev + 1) != (s32)ranges->start); - WARN_ON_ONCE(prev >= (s32)ranges->start); - prev = ranges->start; - WARN_ON_ONCE(prev >= (s32)ranges->end); - prev = ranges->end; - } -} - #define GEN_FW_RANGE(s, e, d) \ { .start = (s), .end = (e), .domains = (d) } @@ -700,23 +680,6 @@ static const i915_reg_t gen8_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; -static void intel_shadow_table_check(void) -{ - const i915_reg_t *reg = gen8_shadowed_regs; - s32 prev; - u32 offset; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { - offset = i915_mmio_reg_offset(*reg); - WARN_ON_ONCE(prev >= (s32)offset); - prev = offset; - } -} - static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -985,29 +948,19 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, ___force_wake_auto(dev_priv, fw_domains); } -#define __gen6_read(x) \ -static u##x \ -gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_READ_HEADER(x); \ - fw_engine = __gen6_reg_read_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - val = __raw_i915_read##x(dev_priv, reg); \ - GEN6_READ_FOOTER; \ -} - -#define __fwtable_read(x) \ +#define __gen_read(func, x) \ static u##x \ -fwtable_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ +func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_READ_HEADER(x); \ - fw_engine = __fwtable_reg_read_fw_domains(offset); \ + fw_engine = __##func##_reg_read_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ val = __raw_i915_read##x(dev_priv, reg); \ GEN6_READ_FOOTER; \ } +#define __gen6_read(x) __gen_read(gen6, x) +#define __fwtable_read(x) __gen_read(fwtable, x) #define __gen9_decoupled_read(x) \ static u##x \ @@ -1045,34 +998,6 @@ __gen6_read(64) #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER -#define VGPU_READ_HEADER(x) \ - unsigned long irqflags; \ - u##x val = 0; \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_READ_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ - trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ - return val - -#define __vgpu_read(x) \ -static u##x \ -vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - VGPU_READ_HEADER(x); \ - val = __raw_i915_read##x(dev_priv, reg); \ - VGPU_READ_FOOTER; \ -} - -__vgpu_read(8) -__vgpu_read(16) -__vgpu_read(32) -__vgpu_read(64) - -#undef __vgpu_read -#undef VGPU_READ_FOOTER -#undef VGPU_READ_HEADER - #define GEN2_WRITE_HEADER \ trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ assert_rpm_wakelock_held(dev_priv); \ @@ -1136,29 +1061,19 @@ gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool GEN6_WRITE_FOOTER; \ } -#define __gen8_write(x) \ -static void \ -gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_WRITE_HEADER; \ - fw_engine = __gen8_reg_write_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - __raw_i915_write##x(dev_priv, reg, val); \ - GEN6_WRITE_FOOTER; \ -} - -#define __fwtable_write(x) \ +#define __gen_write(func, x) \ static void \ -fwtable_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ +func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_WRITE_HEADER; \ - fw_engine = __fwtable_reg_write_fw_domains(offset); \ + fw_engine = __##func##_reg_write_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ __raw_i915_write##x(dev_priv, reg, val); \ GEN6_WRITE_FOOTER; \ } +#define __gen8_write(x) __gen_write(gen8, x) +#define __fwtable_write(x) __gen_write(fwtable, x) #define __gen9_decoupled_write(x) \ static void \ @@ -1195,31 +1110,6 @@ __gen6_write(32) #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define VGPU_WRITE_HEADER \ - unsigned long irqflags; \ - trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_WRITE_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags) - -#define __vgpu_write(x) \ -static void vgpu_write##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, u##x val, bool trace) { \ - VGPU_WRITE_HEADER; \ - __raw_i915_write##x(dev_priv, reg, val); \ - VGPU_WRITE_FOOTER; \ -} - -__vgpu_write(8) -__vgpu_write(16) -__vgpu_write(32) - -#undef __vgpu_write -#undef VGPU_WRITE_FOOTER -#undef VGPU_WRITE_HEADER - #define ASSIGN_WRITE_MMIO_VFUNCS(x) \ do { \ dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ @@ -1375,6 +1265,11 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) FORCEWAKE, FORCEWAKE_ACK); } + if (intel_vgpu_active(dev_priv)) { + dev_priv->uncore.funcs.force_wake_get = vgpu_fw_domains_nop; + dev_priv->uncore.funcs.force_wake_put = vgpu_fw_domains_nop; + } + /* All future platforms are expected to require complex power gating */ WARN_ON(dev_priv->uncore.fw_domains == 0); } @@ -1445,15 +1340,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) break; } - intel_fw_table_check(dev_priv); - if (INTEL_GEN(dev_priv) >= 8) - intel_shadow_table_check(); - - if (intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(vgpu); - ASSIGN_READ_MMIO_VFUNCS(vgpu); - } - i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS @@ -1971,3 +1857,7 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, return fw_domains; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_uncore.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c new file mode 100644 index 000000000000..4e681fc13be4 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -0,0 +1,135 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "huge_gem_object.h" + +static void huge_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + unsigned long nreal = obj->scratch / PAGE_SIZE; + struct scatterlist *sg; + + for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) + __free_page(sg_page(sg)); + + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +huge_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + const unsigned long nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct scatterlist *sg, *src, *end; + struct sg_table *pages; + unsigned long n; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(pages, npages, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + sg = pages->sgl; + for (n = 0; n < nreal; n++) { + struct page *page; + + page = alloc_page(GFP | __GFP_HIGHMEM); + if (!page) { + sg_mark_end(sg); + goto err; + } + + sg_set_page(sg, page, PAGE_SIZE, 0); + sg = __sg_next(sg); + } + if (nreal < npages) { + for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) { + sg_set_page(sg, sg_page(src), PAGE_SIZE, 0); + src = __sg_next(src); + if (src == end) + src = pages->sgl; + } + } + + if (i915_gem_gtt_prepare_pages(obj, pages)) + goto err; + + return pages; + +err: + huge_free_pages(obj, pages); + return ERR_PTR(-ENOMEM); +#undef GFP +} + +static void huge_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_free_pages(obj, pages); + + obj->mm.dirty = false; +} + +static const struct drm_i915_gem_object_ops huge_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = huge_get_pages, + .put_pages = huge_put_pages, +}; + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!phys_size || phys_size > dma_size); + GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(dma_size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); + i915_gem_object_init(obj, &huge_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + obj->scratch = phys_size; + + return obj; +} diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h new file mode 100644 index 000000000000..a6133a9e8029 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __HUGE_GEM_OBJECT_H +#define __HUGE_GEM_OBJECT_H + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size); + +static inline phys_addr_t +huge_gem_object_phys_size(struct drm_i915_gem_object *obj) +{ + return obj->scratch; +} + +static inline dma_addr_t +huge_gem_object_dma_size(struct drm_i915_gem_object *obj) +{ + return obj->base.size; +} + +#endif /* !__HUGE_GEM_OBJECT_H */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c new file mode 100644 index 000000000000..f08d0179b3df --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -0,0 +1,385 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" +#include "i915_random.h" + +static int cpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) *map; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + map[offset_in_page(offset) / sizeof(*map)] = v; + if (needs_clflush & CLFLUSH_AFTER) + clflush(map+offset_in_page(offset) / sizeof(*map)); + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int cpu_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) map; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + *v = map[offset_in_page(offset) / sizeof(*map)]; + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int gtt_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct i915_vma *vma; + typeof(v) *map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int gtt_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + struct i915_vma *vma; + typeof(v) map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int wc_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + typeof(v) *map; + int err; + + /* XXX GTT write followed by WC write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int wc_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + typeof(v) map; + int err; + + /* XXX WC write followed by GTT write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int gpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + __i915_add_request(rq, false); + i915_vma_unpin(vma); + return PTR_ERR(cs); + } + + if (INTEL_GEN(i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = v; + } else if (INTEL_GEN(i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + } else { + *cs++ = MI_STORE_DWORD_IMM | 1 << 22; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + *cs++ = MI_NOOP; + } + intel_ring_advance(rq, cs); + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; +} + +static bool always_valid(struct drm_i915_private *i915) +{ + return true; +} + +static bool needs_mi_store_dword(struct drm_i915_private *i915) +{ + return igt_can_mi_store_dword_imm(i915); +} + +static const struct igt_coherency_mode { + const char *name; + int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); + int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); + bool (*valid)(struct drm_i915_private *i915); +} igt_coherency_mode[] = { + { "cpu", cpu_set, cpu_get, always_valid }, + { "gtt", gtt_set, gtt_get, always_valid }, + { "wc", wc_set, wc_get, always_valid }, + { "gpu", gpu_set, NULL, needs_mi_store_dword }, + { }, +}; + +static int igt_gem_coherency(void *arg) +{ + const unsigned int ncachelines = PAGE_SIZE/64; + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + const struct igt_coherency_mode *read, *write, *over; + struct drm_i915_gem_object *obj; + unsigned long count, n; + u32 *offsets, *values; + int err = 0; + + /* We repeatedly write, overwrite and read from a sequence of + * cachelines in order to try and detect incoherency (unflushed writes + * from either the CPU or GPU). Each setter/getter uses our cache + * domain API which should prevent incoherency. + */ + + offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); + if (!offsets) + return -ENOMEM; + for (count = 0; count < ncachelines; count++) + offsets[count] = count * 64 + 4 * (count % 16); + + values = offsets + ncachelines; + + mutex_lock(&i915->drm.struct_mutex); + for (over = igt_coherency_mode; over->name; over++) { + if (!over->set) + continue; + + if (!over->valid(i915)) + continue; + + for (write = igt_coherency_mode; write->name; write++) { + if (!write->set) + continue; + + if (!write->valid(i915)) + continue; + + for (read = igt_coherency_mode; read->name; read++) { + if (!read->get) + continue; + + if (!read->valid(i915)) + continue; + + for_each_prime_number_from(count, 1, ncachelines) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto unlock; + } + + i915_random_reorder(offsets, ncachelines, &prng); + for (n = 0; n < count; n++) + values[n] = prandom_u32_state(&prng); + + for (n = 0; n < count; n++) { + err = over->set(obj, offsets[n], ~values[n]); + if (err) { + pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", + n, count, over->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + err = write->set(obj, offsets[n], values[n]); + if (err) { + pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", + n, count, write->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + u32 found; + + err = read->get(obj, offsets[n], &found); + if (err) { + pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", + n, count, read->name, err); + goto put_object; + } + + if (found != values[n]) { + pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", + n, count, over->name, + write->name, values[n], + read->name, found, + ~values[n], offsets[n]); + err = -EINVAL; + goto put_object; + } + } + + __i915_gem_object_release_unless_active(obj); + } + } + } + } +unlock: + mutex_unlock(&i915->drm.struct_mutex); + kfree(offsets); + return err; + +put_object: + __i915_gem_object_release_unless_active(obj); + goto unlock; +} + +int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_coherency), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c new file mode 100644 index 000000000000..3813a19a6179 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -0,0 +1,459 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "huge_gem_object.h" + +#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) + +static struct i915_vma * +gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + GEM_BUG_ON(!igt_can_mi_store_dword_imm(vma->vm->i915)); + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? 1 << 22 : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = value; + } else { + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = offset; + *cmd++ = value; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static unsigned long real_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; +} + +static unsigned long fake_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; +} + +static int gpu_fill(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + unsigned int dw) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(obj->base.size > vm->total); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); + if (err) + return err; + + /* Within the GTT the huge objects maps every page onto + * its 1024 real pages (using phys_pfn = dma_pfn % 1024). + * We set the nth dword within the page using the nth + * mapping via the GTT - this should exercise the GTT mapping + * whilst checking that each context provides a unique view + * into the object. + */ + batch = gpu_fill_dw(vma, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_request; + + err = i915_switch_context(rq); + if (err) + goto err_request; + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_move_to_active(batch, rq, 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_move_to_active(vma, rq, 0); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; + +err_request: + __i915_add_request(rq, false); +err_batch: + i915_vma_unpin(batch); +err_vma: + i915_vma_unpin(vma); + return err; +} + +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) +{ + const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); + unsigned int n, m, need_flush; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &need_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + for (m = 0; m < DW_PER_PAGE; m++) + map[m] = value; + if (!has_llc) + drm_clflush_virt_range(map, PAGE_SIZE); + kunmap_atomic(map); + } + + i915_gem_obj_finish_shmem_access(obj); + obj->base.read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; + obj->base.write_domain = 0; + return 0; +} + +static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) +{ + unsigned int n, m, needs_flush; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(map, PAGE_SIZE); + + for (m = 0; m < max; m++) { + if (map[m] != m) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], m); + err = -EINVAL; + goto out_unmap; + } + } + + for (; m < DW_PER_PAGE; m++) { + if (map[m] != 0xdeadbeef) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], 0xdeadbeef); + err = -EINVAL; + goto out_unmap; + } + } + +out_unmap: + kunmap_atomic(map); + if (err) + break; + } + + i915_gem_obj_finish_shmem_access(obj); + return err; +} + +static struct drm_i915_gem_object * +create_test_object(struct i915_gem_context *ctx, + struct drm_file *file, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; + u64 size; + u32 handle; + int err; + + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); + size = round_down(size, DW_PER_PAGE * PAGE_SIZE); + + obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + if (IS_ERR(obj)) + return obj; + + /* tie the handle to the drm_file for easy reaping */ + err = drm_gem_handle_create(file, &obj->base, &handle); + i915_gem_object_put(obj); + if (err) + return ERR_PTR(err); + + err = cpu_fill(obj, 0xdeadbeef); + if (err) { + pr_err("Failed to fill object with cpu, err=%d\n", + err); + return ERR_PTR(err); + } + + list_add_tail(&obj->st_link, objects); + return obj; +} + +static unsigned long max_dwords(struct drm_i915_gem_object *obj) +{ + unsigned long npages = fake_page_count(obj); + + GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); + return npages / DW_PER_PAGE; +} + +static int igt_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_file *file = mock_file(i915); + struct drm_i915_gem_object *obj; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + unsigned long ncontexts, ndwords, dw; + bool first_shared_gtt = true; + int err; + + /* Create a few different contexts (with different mm) and write + * through each ctx/mm using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + + mutex_lock(&i915->drm.struct_mutex); + + ncontexts = 0; + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + unsigned int id; + + if (first_shared_gtt) { + ctx = __create_hw_context(i915, file->driver_priv); + first_shared_gtt = false; + } else { + ctx = i915_gem_create_context(i915, file->driver_priv); + } + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + if (dw == 0) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + } + + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) + dw = 0; + ndwords++; + } + ncontexts++; + } + pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n", + ncontexts, INTEL_INFO(i915)->num_rings, ndwords); + + dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, rem); + if (err) + break; + + dw += rem; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + int err; + + err = i915_gem_init_aliasing_ppgtt(i915); + if (err) + return err; + + list_for_each_entry(obj, &i915->mm.bound_list, global_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + continue; + + vma->flags &= ~I915_VMA_LOCAL_BIND; + } + + return 0; +} + +static void fake_aliasing_ppgtt_disable(struct drm_i915_private *i915) +{ + i915_gem_fini_aliasing_ppgtt(i915); +} + +int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ctx_exec), + }; + bool fake_alias = false; + int err; + + /* Install a fake aliasing gtt for exercise */ + if (USES_PPGTT(dev_priv) && !dev_priv->mm.aliasing_ppgtt) { + mutex_lock(&dev_priv->drm.struct_mutex); + err = fake_aliasing_ppgtt_enable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + if (err) + return err; + + GEM_BUG_ON(!dev_priv->mm.aliasing_ppgtt); + fake_alias = true; + } + + err = i915_subtests(tests, dev_priv); + + if (fake_alias) { + mutex_lock(&dev_priv->drm.struct_mutex); + fake_aliasing_ppgtt_disable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + } + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c new file mode 100644 index 000000000000..817bef74bbcb --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c @@ -0,0 +1,303 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_dmabuf.h" + +static int igt_dmabuf_export(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + return PTR_ERR(dmabuf); + } + + dma_buf_put(dmabuf); + return 0; +} + +static int igt_dmabuf_import_self(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import != &obj->base) { + pr_err("i915_gem_prime_import created a new object!\n"); + err = -EINVAL; + goto out_import; + } + + err = 0; +out_import: + i915_gem_object_put(to_intel_bo(import)); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_import(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *obj_map, *dma_map; + u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff }; + int err, i; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto out_dmabuf; + } + + if (obj->base.dev != &i915->drm) { + pr_err("i915_gem_prime_import created a non-i915 object!\n"); + err = -EINVAL; + goto out_obj; + } + + if (obj->base.size != PAGE_SIZE) { + pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n", + (long long)obj->base.size, PAGE_SIZE); + err = -EINVAL; + goto out_obj; + } + + dma_map = dma_buf_vmap(dmabuf); + if (!dma_map) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto out_obj; + } + + if (0) { /* Can not yet map dmabuf */ + obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(obj_map)) { + err = PTR_ERR(obj_map); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto out_dma_map; + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(dma_map, pattern[i], PAGE_SIZE); + if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("imported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(obj_map, pattern[i], PAGE_SIZE); + if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("exported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + i915_gem_object_unpin_map(obj); + } + + err = 0; +out_dma_map: + dma_buf_vunmap(dmabuf, dma_map); +out_obj: + i915_gem_object_put(obj); +out_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_import_ownership(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + ptr = dma_buf_vmap(dmabuf); + if (!ptr) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto err_dmabuf; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_vunmap(dmabuf, ptr); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto err_dmabuf; + } + + dma_buf_put(dmabuf); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); + goto out_obj; + } + + err = 0; + i915_gem_object_unpin_pages(obj); +out_obj: + i915_gem_object_put(obj); + return err; + +err_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_export_vmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto err_obj; + } + i915_gem_object_put(obj); + + ptr = dma_buf_vmap(dmabuf); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + pr_err("dma_buf_vmap failed with err=%d\n", err); + goto out; + } + + if (memchr_inv(ptr, 0, dmabuf->size)) { + pr_err("Exported object not initialiased to zero!\n"); + err = -EINVAL; + goto out; + } + + memset(ptr, 0xc5, dmabuf->size); + + err = 0; + dma_buf_vunmap(dmabuf, ptr); +out: + dma_buf_put(dmabuf); + return err; + +err_obj: + i915_gem_object_put(obj); + return err; +} + +int i915_gem_dmabuf_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_self), + SUBTEST(igt_dmabuf_import), + SUBTEST(igt_dmabuf_import_ownership), + SUBTEST(igt_dmabuf_export_vmap), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} + +int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c new file mode 100644 index 000000000000..97af353db218 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -0,0 +1,260 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" + +static int populate_ggtt(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + u64 size; + + for (size = 0; + size + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + size += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + } + + if (!list_empty(&i915->mm.unbound_list)) { + size = 0; + list_for_each_entry(obj, &i915->mm.unbound_list, global_link) + size++; + + pr_err("Found %lld objects unbound!\n", size); + return -EINVAL; + } + + if (list_empty(&i915->ggtt.base.inactive_list)) { + pr_err("No objects on the GGTT inactive list!\n"); + return -EINVAL; + } + + return 0; +} + +static void unpin_ggtt(struct drm_i915_private *i915) +{ + struct i915_vma *vma; + + list_for_each_entry(vma, &i915->ggtt.base.inactive_list, vm_link) + i915_vma_unpin(vma); +} + +static void cleanup_objects(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link) + i915_gem_object_put(obj); + + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link) + i915_gem_object_put(obj); + + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_drain_freed_objects(i915); + + mutex_lock(&i915->drm.struct_mutex); +} + +static int igt_evict_something(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict one. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict something */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_overcommit(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + /* Fill the GGTT with pinned objects and then try to pin one more. + * We expect it to fail. + */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto cleanup; + } + + list_move(&obj->global_link, &i915->mm.unbound_list); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { + pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); + err = -EINVAL; + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_evict_for_vma(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node target = { + .start = 0, + .size = 4096, + }; + int err; + + /* Fill the GGTT with pinned objects and try to evict a range. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict the node */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err) { + pr_err("i915_gem_evict_for_node returned err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_evict_vm(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict everything. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +int i915_gem_evict_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_evict_something), + SUBTEST(igt_evict_for_vma), + SUBTEST(igt_evict_vm), + SUBTEST(igt_overcommit), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c new file mode 100644 index 000000000000..0f3fa34377c6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -0,0 +1,1556 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/list_sort.h> +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_context.h" +#include "mock_drm.h" +#include "mock_gem_device.h" + +static void fake_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +fake_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) +#define PFN_BIAS 0x1000 + struct sg_table *pages; + struct scatterlist *sg; + typeof(obj->base.size) rem; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + rem = round_up(obj->base.size, BIT(31)) >> 31; + if (sg_alloc_table(pages, rem, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + rem = obj->base.size; + for (sg = pages->sgl; sg; sg = sg_next(sg)) { + unsigned long len = min_t(typeof(rem), rem, BIT(31)); + + GEM_BUG_ON(!len); + sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); + sg_dma_address(sg) = page_to_phys(sg_page(sg)); + sg_dma_len(sg) = len; + + rem -= len; + } + GEM_BUG_ON(rem); + + obj->mm.madv = I915_MADV_DONTNEED; + return pages; +#undef GFP +} + +static void fake_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_pages, + .put_pages = fake_put_pages, +}; + +static struct drm_i915_gem_object * +fake_dma_object(struct drm_i915_private *i915, u64 size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &fake_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + /* Preallocate the "backing storage" */ + if (i915_gem_object_pin_pages(obj)) + return ERR_PTR(-ENOMEM); + + i915_gem_object_unpin_pages(obj); + return obj; +} + +static int igt_ppgtt_alloc(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + struct i915_hw_ppgtt *ppgtt; + u64 size, last; + int err; + + /* Allocate a ppggt and try to fill the entire range */ + + if (!USES_PPGTT(dev_priv)) + return 0; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + mutex_lock(&dev_priv->drm.struct_mutex); + err = __hw_ppgtt_init(ppgtt, dev_priv); + if (err) + goto err_ppgtt; + + if (!ppgtt->base.allocate_va_range) + goto err_ppgtt_cleanup; + + /* Check we can allocate the entire range */ + for (size = 4096; + size <= ppgtt->base.total; + size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, 0, size); + if (err) { + if (err == -ENOMEM) { + pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n", + size, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + + ppgtt->base.clear_range(&ppgtt->base, 0, size); + } + + /* Check we can incrementally allocate the entire range */ + for (last = 0, size = 4096; + size <= ppgtt->base.total; + last = size, size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, + last, size - last); + if (err) { + if (err == -ENOMEM) { + pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n", + last, size - last, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + } + +err_ppgtt_cleanup: + ppgtt->base.cleanup(&ppgtt->base); +err_ppgtt: + mutex_unlock(&dev_priv->drm.struct_mutex); + kfree(ppgtt); + return err; +} + +static int lowlevel_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(seed_prng); + unsigned int size; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + I915_RND_SUBSTATE(prng, seed_prng); + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + u64 hole_size; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + GEM_BUG_ON(count * BIT_ULL(size) > vm->total); + GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + GEM_BUG_ON(obj->base.size != BIT_ULL(size)); + + if (i915_gem_object_pin_pages(obj)) { + i915_gem_object_put(obj); + kfree(order); + break; + } + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + + if (igt_timeout(end_time, + "%s timed out before %d/%d\n", + __func__, n, count)) { + hole_end = hole_start; /* quit */ + break; + } + + if (vm->allocate_va_range && + vm->allocate_va_range(vm, addr, BIT_ULL(size))) + break; + + vm->insert_entries(vm, obj->mm.pages, addr, + I915_CACHE_NONE, 0); + } + count = n; + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + vm->clear_range(vm, addr, BIT_ULL(size)); + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + + kfree(order); + } + + return 0; +} + +static void close_object_list(struct list_head *objects, + struct i915_address_space *vm) +{ + struct drm_i915_gem_object *obj, *on; + int ignored; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, vm, NULL); + if (!IS_ERR(vma)) + ignored = i915_vma_unbind(vma); + /* Only ppgtt vma may be closed before the object is freed */ + if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_put(obj); + } +} + +static int fill_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + struct drm_i915_gem_object *obj; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT); + const unsigned long max_step = max(int_sqrt(max_pages), 2UL); + unsigned long npages, prime, flags; + struct i915_vma *vma; + LIST_HEAD(objects); + int err; + + /* Try binding many VMA working inwards from either edge */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(prime, 2, max_step) { + for (npages = 1; npages <= max_pages; npages *= prime) { + const u64 full_size = npages << PAGE_SHIFT; + const struct { + const char *name; + u64 offset; + int step; + } phases[] = { + { "top-down", hole_end, -1, }, + { "bottom-up", hole_start, 1, }, + { } + }, *p; + + obj = fake_dma_object(i915, full_size); + if (IS_ERR(obj)) + break; + + list_add(&obj->st_link, &objects); + + /* Align differing sized objects against the edges, and + * check we don't walk off into the void when binding + * them into the GTT. + */ + for (p = phases; p->name; p++) { + u64 offset; + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (forward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) moved vma.node=%llx + %llx, expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (backward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) moved vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + } + + if (igt_timeout(end_time, "%s timed out (npages=%lu, prime=%lu)\n", + __func__, npages, prime)) { + err = -EINTR; + goto err; + } + } + + close_object_list(&objects, vm); + } + + return 0; + +err: + close_object_list(&objects, vm); + return err; +} + +static int walk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT); + unsigned long flags; + u64 size; + + /* Try binding a single VMA in different positions within the hole */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(size, 1, max_pages) { + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u64 addr; + int err = 0; + + obj = fake_dma_object(i915, size << PAGE_SHIFT); + if (IS_ERR(obj)) + break; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + for (addr = hole_start; + addr + obj->base.size < hole_end; + addr += obj->base.size) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n", + __func__, addr, vma->size, + hole_start, hole_end, err); + goto err; + } + i915_vma_unpin(vma); + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s unbind failed at %llx + %llx with err=%d\n", + __func__, addr, vma->size, err); + goto err; + } + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + if (igt_timeout(end_time, + "%s timed out at %llx\n", + __func__, addr)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + +static int pot_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned long flags; + unsigned int pot; + int err = 0; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + obj = i915_gem_object_create_internal(i915, 2 * I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + /* Insert a pair of pages across every pot boundary within the hole */ + for (pot = fls64(hole_end - 1) - 1; + pot > ilog2(2 * I915_GTT_PAGE_SIZE); + pot--) { + u64 step = BIT_ULL(pot); + u64 addr; + + for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + addr += step) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + } + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, pot, fls64(hole_end - 1) - 1)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + return err; +} + +static int drunk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(prng); + unsigned int size; + unsigned long flags; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + struct i915_vma *vma; + u64 hole_size; + int err; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + GEM_BUG_ON(vma->size != BIT_ULL(size)); + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, BIT_ULL(size), + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, BIT_ULL(size)); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, n, count)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + kfree(order); + if (err) + return err; + } + + return 0; +} + +static int __shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + unsigned int order = 12; + LIST_HEAD(objects); + int err = 0; + u64 addr; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (addr = hole_start; addr < hole_end; ) { + struct i915_vma *vma; + u64 size = BIT_ULL(order++); + + size = min(size, hole_end - addr); + obj = fake_dma_object(i915, size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + GEM_BUG_ON(vma->size != size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, addr, size, hole_start, hole_end, err); + break; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + break; + } + + i915_vma_unpin(vma); + addr += size; + + if (igt_timeout(end_time, + "%s timed out at ofset %llx [%llx - %llx]\n", + __func__, addr, hole_start, hole_end)) { + err = -EINTR; + break; + } + } + + close_object_list(&objects, vm); + return err; +} + +static int shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + unsigned long prime; + int err; + + vm->fault_attr.probability = 999; + atomic_set(&vm->fault_attr.times, -1); + + for_each_prime_number_from(prime, 0, ULONG_MAX - 1) { + vm->fault_attr.interval = prime; + err = __shrink_hole(i915, vm, hole_start, hole_end, end_time); + if (err) + break; + } + + memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); + + return err; +} + +static int exercise_ppgtt(struct drm_i915_private *dev_priv, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct drm_file *file; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + if (!USES_FULL_PPGTT(dev_priv)) + return 0; + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv, file->driver_priv, "mock"); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + GEM_BUG_ON(offset_in_page(ppgtt->base.total)); + GEM_BUG_ON(ppgtt->base.closed); + + err = func(dev_priv, &ppgtt->base, 0, ppgtt->base.total, end_time); + + i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_put(ppgtt); +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + return err; +} + +static int igt_ppgtt_fill(void *arg) +{ + return exercise_ppgtt(arg, fill_hole); +} + +static int igt_ppgtt_walk(void *arg) +{ + return exercise_ppgtt(arg, walk_hole); +} + +static int igt_ppgtt_pot(void *arg) +{ + return exercise_ppgtt(arg, pot_hole); +} + +static int igt_ppgtt_drunk(void *arg) +{ + return exercise_ppgtt(arg, drunk_hole); +} + +static int igt_ppgtt_lowlevel(void *arg) +{ + return exercise_ppgtt(arg, lowlevel_hole); +} + +static int igt_ppgtt_shrink(void *arg) +{ + return exercise_ppgtt(arg, shrink_hole); +} + +static int sort_holes(void *priv, struct list_head *A, struct list_head *B) +{ + struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); + struct drm_mm_node *b = list_entry(B, typeof(*b), hole_stack); + + if (a->start < b->start) + return -1; + else + return 1; +} + +static int exercise_ggtt(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + u64 hole_start, hole_end, last = 0; + struct drm_mm_node *node; + IGT_TIMEOUT(end_time); + int err; + + mutex_lock(&i915->drm.struct_mutex); +restart: + list_sort(NULL, &ggtt->base.mm.hole_stack, sort_holes); + drm_mm_for_each_hole(node, &ggtt->base.mm, hole_start, hole_end) { + if (hole_start < last) + continue; + + if (ggtt->base.mm.color_adjust) + ggtt->base.mm.color_adjust(node, 0, + &hole_start, &hole_end); + if (hole_start >= hole_end) + continue; + + err = func(i915, &ggtt->base, hole_start, hole_end, end_time); + if (err) + break; + + /* As we have manipulated the drm_mm, the list may be corrupt */ + last = hole_end; + goto restart; + } + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int igt_ggtt_fill(void *arg) +{ + return exercise_ggtt(arg, fill_hole); +} + +static int igt_ggtt_walk(void *arg) +{ + return exercise_ggtt(arg, walk_hole); +} + +static int igt_ggtt_pot(void *arg) +{ + return exercise_ggtt(arg, pot_hole); +} + +static int igt_ggtt_drunk(void *arg) +{ + return exercise_ggtt(arg, drunk_hole); +} + +static int igt_ggtt_lowlevel(void *arg) +{ + return exercise_ggtt(arg, lowlevel_hole); +} + +static int igt_ggtt_page(void *arg) +{ + const unsigned int count = PAGE_SIZE/sizeof(u32); + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_i915_gem_object *obj; + struct drm_mm_node tmp; + unsigned int *order, n; + int err; + + mutex_lock(&i915->drm.struct_mutex); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_free; + + memset(&tmp, 0, sizeof(tmp)); + err = drm_mm_insert_node_in_range(&ggtt->base.mm, &tmp, + 1024 * PAGE_SIZE, 0, + I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (err) + goto out_unpin; + + order = i915_random_order(count, &prng); + if (!order) { + err = -ENOMEM; + goto out_remove; + } + + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + iowrite32(n, vaddr + n); + io_mapping_unmap_atomic(vaddr); + + wmb(); + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + u32 val; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + val = ioread32(vaddr + n); + io_mapping_unmap_atomic(vaddr); + + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + + if (val != n) { + pr_err("insert page failed: found %d, expected %d\n", + val, n); + err = -EINVAL; + break; + } + } + + kfree(order); +out_remove: + drm_mm_remove_node(&tmp); +out_unpin: + i915_gem_object_unpin_pages(obj); +out_free: + i915_gem_object_put(obj); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static void track_vma_bind(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + obj->bind_count++; /* track for eviction later */ + __i915_gem_object_pin_pages(obj); + + vma->pages = obj->mm.pages; + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); +} + +static int exercise_mock(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_gem_context *ctx; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + ctx = mock_context(i915, "mock"); + if (!ctx) + return -ENOMEM; + + ppgtt = ctx->ppgtt; + GEM_BUG_ON(!ppgtt); + + err = func(i915, &ppgtt->base, 0, ppgtt->base.total, end_time); + + mock_context_close(ctx); + return err; +} + +static int igt_mock_fill(void *arg) +{ + return exercise_mock(arg, fill_hole); +} + +static int igt_mock_walk(void *arg) +{ + return exercise_mock(arg, walk_hole); +} + +static int igt_mock_pot(void *arg) +{ + return exercise_mock(arg, pot_hole); +} + +static int igt_mock_drunk(void *arg) +{ + return exercise_mock(arg, drunk_hole); +} + +static int igt_gtt_reserve(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_reserve() tries to reserve the precise range + * for the node, and evicts if it has to. So our test checks that + * it can give us the requsted space and prevent overlaps. + */ + + /* Start by filling the GGTT */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* Now we start forcing evictions */ + for (total = I915_GTT_PAGE_SIZE; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* And then try at random */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + offset = random_offset(0, i915->ggtt.base.total, + 2*I915_GTT_PAGE_SIZE, + I915_GTT_MIN_ALIGNMENT); + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + offset, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + offset, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + +static int igt_gtt_insert(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct drm_mm_node tmp = {}; + const struct invalid_insert { + u64 size; + u64 alignment; + u64 start, end; + } invalid_insert[] = { + { + i915->ggtt.base.total + I915_GTT_PAGE_SIZE, 0, + 0, i915->ggtt.base.total, + }, + { + 2*I915_GTT_PAGE_SIZE, 0, + 0, I915_GTT_PAGE_SIZE, + }, + { + -(u64)I915_GTT_PAGE_SIZE, 0, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + -(u64)2*I915_GTT_PAGE_SIZE, 2*I915_GTT_PAGE_SIZE, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT << 1, + I915_GTT_MIN_ALIGNMENT, I915_GTT_MIN_ALIGNMENT << 1, + }, + {} + }, *ii; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_insert() tries to allocate some free space in the GTT + * to the node, evicting if required. + */ + + /* Check a couple of obviously invalid requests */ + for (ii = invalid_insert; ii->size; ii++) { + err = i915_gem_gtt_insert(&i915->ggtt.base, &tmp, + ii->size, ii->alignment, + I915_COLOR_UNEVICTABLE, + ii->start, ii->end, + 0); + if (err != -ENOSPC) { + pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n", + ii->size, ii->alignment, ii->start, ii->end, + err); + return -EINVAL; + } + } + + /* Start by filling the GGTT */ + for (total = 0; + total + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err == -ENOSPC) { + /* maxed out the GGTT space */ + i915_gem_object_put(obj); + break; + } + if (err) { + pr_err("i915_gem_gtt_insert (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + __i915_vma_pin(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + + list_for_each_entry(obj, &objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + if (!drm_mm_node_allocated(&vma->node)) { + pr_err("VMA was unexpectedly evicted!\n"); + err = -EINVAL; + goto out; + } + + __i915_vma_unpin(vma); + } + + /* If we then reinsert, we should find the same hole */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + offset = vma->node.start; + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset) { + pr_err("i915_gem_gtt_insert did not return node to its previous location (the only hole), expected address %llx, found %llx\n", + offset, vma->node.start); + err = -EINVAL; + goto out; + } + } + + /* And then force evictions */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + +int i915_gem_gtt_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_drunk), + SUBTEST(igt_mock_walk), + SUBTEST(igt_mock_pot), + SUBTEST(igt_mock_fill), + SUBTEST(igt_gtt_reserve), + SUBTEST(igt_gtt_insert), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + +int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_lowlevel), + SUBTEST(igt_ppgtt_drunk), + SUBTEST(igt_ppgtt_walk), + SUBTEST(igt_ppgtt_pot), + SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ppgtt_shrink), + SUBTEST(igt_ggtt_lowlevel), + SUBTEST(igt_ggtt_drunk), + SUBTEST(igt_ggtt_walk), + SUBTEST(igt_ggtt_pot), + SUBTEST(igt_ggtt_fill), + SUBTEST(igt_ggtt_page), + }; + + GEM_BUG_ON(offset_in_page(i915->ggtt.base.total)); + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c new file mode 100644 index 000000000000..67d82bf1407f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -0,0 +1,600 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "huge_gem_object.h" + +static int igt_gem_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err = -ENOMEM; + + /* Basic test to ensure we can create an object */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + err = 0; + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_phys_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err; + + /* Create an object and bind it to a contiguous set of physical pages, + * i.e. exercise the i915_gem_object_phys API. + */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_attach_phys(obj, PAGE_SIZE); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); + goto out_obj; + } + + if (obj->ops != &i915_gem_phys_ops) { + pr_err("i915_gem_object_attach_phys did not create a phys object\n"); + err = -EINVAL; + goto out_obj; + } + + if (!atomic_read(&obj->mm.pages_pin_count)) { + pr_err("i915_gem_object_attach_phys did not pin its phys pages\n"); + err = -EINVAL; + goto out_obj; + } + + /* Make the object dirty so that put_pages must do copy back the data */ + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_set_to_gtt_domain(obj, true); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n", + err); + goto out_obj; + } + +out_obj: + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_gem_huge(void *arg) +{ + const unsigned int nreal = 509; /* just to be awkward */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + unsigned int n; + int err; + + /* Basic sanitycheck of our huge fake object allocation */ + + obj = huge_gem_object(i915, + nreal * PAGE_SIZE, + i915->ggtt.base.total + PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { + if (i915_gem_object_get_page(obj, n) != + i915_gem_object_get_page(obj, n % nreal)) { + pr_err("Page lookup mismatch at index %u [%u]\n", + n, n % nreal); + err = -EINVAL; + goto out_unpin; + } + } + +out_unpin: + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +struct tile { + unsigned int width; + unsigned int height; + unsigned int stride; + unsigned int size; + unsigned int tiling; + unsigned int swizzle; +}; + +static u64 swizzle_bit(unsigned int bit, u64 offset) +{ + return (offset & BIT_ULL(bit)) >> (bit - 6); +} + +static u64 tiled_offset(const struct tile *tile, u64 v) +{ + u64 x, y; + + if (tile->tiling == I915_TILING_NONE) + return v; + + y = div64_u64_rem(v, tile->stride, &x); + v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; + + if (tile->tiling == I915_TILING_X) { + v += y * tile->width; + v += div64_u64_rem(x, tile->width, &x) << tile->size; + v += x; + } else { + const unsigned int ytile_span = 16; + const unsigned int ytile_height = 32 * ytile_span; + + v += y * ytile_span; + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; + v += x; + } + + switch (tile->swizzle) { + case I915_BIT_6_SWIZZLE_9: + v ^= swizzle_bit(9, v); + break; + case I915_BIT_6_SWIZZLE_9_10: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); + break; + case I915_BIT_6_SWIZZLE_9_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); + break; + case I915_BIT_6_SWIZZLE_9_10_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); + break; + } + + return v; +} + +static int check_partial_mapping(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; + + if (igt_timeout(end_time, + "%s: timed out before tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; + + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) + return err; + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + for_each_prime_number_from(page, 1, npages) { + struct i915_ggtt_view view = + compute_partial_view(obj, page, MIN_CHUNK_PAGES); + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; + + GEM_BUG_ON(view.partial.size > nreal); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu\n", + page); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu\n", + page); + return PTR_ERR(io); + } + + err = i915_vma_get_fence(vma); + if (err) { + pr_err("Failed to get fence for partial view: offset=%lu\n", + page); + i915_vma_unpin_iomap(vma); + return err; + } + + iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + continue; + + i915_gem_object_flush_gtt_write_domain(obj); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile_row_pages(obj), + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + if (err) + return err; + } + + return 0; +} + +static int igt_partial_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int tiling; + int err; + + /* We want to check the page mapping and fencing of a large object + * mmapped through the GTT. The object we create is larger than can + * possibly be mmaped as a whole, and so we must use partial GGTT vma. + * We then check that a write through each partial GGTT vma ends up + * in the right set of pages within the object, and with the expected + * tiling, which we verify by manual swizzling. + */ + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.base.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + + if (1) { + IGT_TIMEOUT(end); + struct tile tile; + + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + tile.tiling = I915_TILING_NONE; + + err = check_partial_mapping(obj, &tile, end); + if (err && err != -EINTR) + goto out_unlock; + } + + for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) { + IGT_TIMEOUT(end); + unsigned int max_pitch; + unsigned int pitch; + struct tile tile; + + tile.tiling = tiling; + switch (tiling) { + case I915_TILING_X: + tile.swizzle = i915->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->mm.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (INTEL_GEN(i915) <= 2) { + tile.height = 16; + tile.width = 128; + tile.size = 11; + } else if (tile.tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile.height = 32; + tile.width = 128; + tile.size = 12; + } else { + tile.height = 8; + tile.width = 512; + tile.size = 12; + } + + if (INTEL_GEN(i915) < 4) + max_pitch = 8192 / tile.width; + else if (INTEL_GEN(i915) < 7) + max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; + else + max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + + for (pitch = max_pitch; pitch; pitch >>= 1) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + + if (pitch > 2 && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch - 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + + if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch + 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + + if (INTEL_GEN(i915) >= 4) { + for_each_prime_number(pitch, max_pitch) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + +next_tiling: ; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +static int make_obj_busy(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + i915_vma_move_to_active(vma, rq, 0); + i915_add_request(rq); + + i915_gem_object_set_active_reference(obj); + i915_vma_unpin(vma); + return 0; +} + +static bool assert_mmap_offset(struct drm_i915_private *i915, + unsigned long size, + int expected) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_create_mmap_offset(obj); + i915_gem_object_put(obj); + + return err == expected; +} + +static int igt_mmap_offset_exhaustion(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; + struct drm_i915_gem_object *obj; + struct drm_mm_node resv, *hole; + u64 hole_start, hole_end; + int loop, err; + + /* Trim the device mmap space to only a page */ + memset(&resv, 0, sizeof(resv)); + drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { + resv.start = hole_start; + resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ + err = drm_mm_reserve_node(mm, &resv); + if (err) { + pr_err("Failed to trim VMA manager, err=%d\n", err); + return err; + } + break; + } + + /* Just fits! */ + if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { + pr_err("Unable to insert object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Too large */ + if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Fill the hole, further allocation attempts should then fail */ + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("Unable to insert object into reclaimed hole\n"); + goto err_obj; + } + + if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); + err = -EINVAL; + goto err_obj; + } + + i915_gem_object_put(obj); + + /* Now fill with busy dead objects that we expect to reap */ + for (loop = 0; loop < 3; loop++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = make_obj_busy(obj); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("[loop %d] Failed to busy the object\n", loop); + goto err_obj; + } + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n", + loop, err); + goto out; + } + } + +out: + drm_mm_remove_node(&resv); + return err; +err_obj: + i915_gem_object_put(obj); + goto out; +} + +int i915_gem_object_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_object), + SUBTEST(igt_phys_object), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} + +int i915_gem_object_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_huge), + SUBTEST(igt_partial_tiling), + SUBTEST(igt_mmap_offset_exhaustion), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c new file mode 100644 index 000000000000..42bdeac93324 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -0,0 +1,882 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" + +#include "mock_context.h" +#include "mock_gem_device.h" + +static int igt_add_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -ENOMEM; + + /* Basic preliminary test to create a request and let it loose! */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], + i915->kernel_context, + HZ / 10); + if (!request) + goto out_unlock; + + i915_add_request(request); + + err = 0; +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_wait_request(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, then wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) != -ETIME) { + pr_err("request wait succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed before submit!!\n"); + goto out_unlock; + } + + i915_add_request(request); + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed immediately!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { + pr_err("request wait succeeded (expected timeout!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out!\n"); + goto out_unlock; + } + + if (!i915_gem_request_completed(request)) { + pr_err("request not complete after waiting!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out when already complete!\n"); + goto out_unlock; + } + + err = 0; +out_unlock: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_fence_wait(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, treat it as a fence and wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_locked; + } + mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ + + if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { + pr_err("fence wait success before submit (expected timeout)!\n"); + goto out_device; + } + + mutex_lock(&i915->drm.struct_mutex); + i915_add_request(request); + mutex_unlock(&i915->drm.struct_mutex); + + if (dma_fence_is_signaled(&request->fence)) { + pr_err("fence signaled immediately!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { + pr_err("fence wait success after submit (expected timeout)!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out (expected success)!\n"); + goto out_device; + } + + if (!dma_fence_is_signaled(&request->fence)) { + pr_err("fence unsignaled after waiting!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out when complete (expected success)!\n"); + goto out_device; + } + + err = 0; +out_device: + mutex_lock(&i915->drm.struct_mutex); +out_locked: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_request_rewind(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request, *vip; + struct i915_gem_context *ctx[2]; + int err = -EINVAL; + + mutex_lock(&i915->drm.struct_mutex); + ctx[0] = mock_context(i915, "A"); + request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); + if (!request) { + err = -ENOMEM; + goto err_context_0; + } + + i915_gem_request_get(request); + i915_add_request(request); + + ctx[1] = mock_context(i915, "B"); + vip = mock_request(i915->engine[RCS], ctx[1], 0); + if (!vip) { + err = -ENOMEM; + goto err_context_1; + } + + /* Simulate preemption by manual reordering */ + if (!mock_cancel_request(request)) { + pr_err("failed to cancel request (already executed)!\n"); + i915_add_request(vip); + goto err_context_1; + } + i915_gem_request_get(vip); + i915_add_request(vip); + request->engine->submit_request(request); + + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_wait_request(vip, 0, HZ) == -ETIME) { + pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", + vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + goto err; + } + + if (i915_gem_request_completed(request)) { + pr_err("low priority request already completed\n"); + goto err; + } + + err = 0; +err: + i915_gem_request_put(vip); + mutex_lock(&i915->drm.struct_mutex); +err_context_1: + mock_context_close(ctx[1]); + i915_gem_request_put(request); +err_context_0: + mock_context_close(ctx[0]); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_add_request), + SUBTEST(igt_wait_request), + SUBTEST(igt_fence_wait), + SUBTEST(igt_request_rewind), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + drm_dev_unref(&i915->drm); + + return err; +} + +struct live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_count; +}; + +static int begin_live_test(struct live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + int err; + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + i915_gem_retire_requests(i915); + + i915->gpu_error.missed_irq_rings = 0; + t->reset_count = i915_reset_count(&i915->gpu_error); + + return 0; +} + +static int end_live_test(struct live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + + if (wait_for(intel_execlists_idle(i915), 1)) { + pr_err("%s(%s): GPU not idle\n", t->func, t->name); + return -EIO; + } + + if (t->reset_count != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_count); + return -EIO; + } + + if (i915->gpu_error.missed_irq_rings) { + pr_err("%s(%s): Missed interrupts on engines %lx\n", + t->func, t->name, i915->gpu_error.missed_irq_rings); + return -EIO; + } + + return 0; +} + +static int live_nop_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_unlock; + } + + /* This space is left intentionally blank. + * + * We do not actually want to perform any + * action with this request, we just want + * to measure the latency in allocation + * and submission of our breadcrumbs - + * ensuring that the bare request is sufficient + * for the system to work (i.e. proper HEAD + * tracking of the rings, interrupt handling, + * etc). It also gives us the lowest bounds + * for latency. + */ + + i915_add_request(request); + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_unlock; + + pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *empty_batch(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static struct drm_i915_gem_request * +empty_request(struct intel_engine_cs *engine, + struct i915_vma *batch) +{ + struct drm_i915_gem_request *request; + int err; + + request = i915_gem_request_alloc(engine, + engine->i915->kernel_context); + if (IS_ERR(request)) + return request; + + err = engine->emit_flush(request, EMIT_INVALIDATE); + if (err) + goto out_request; + + err = i915_switch_context(request); + if (err) + goto out_request; + + err = engine->emit_bb_start(request, + batch->node.start, + batch->node.size, + I915_DISPATCH_SECURE); + if (err) + goto out_request; + +out_request: + __i915_add_request(request, err == 0); + return err ? ERR_PTR(err) : request; +} + +static int live_empty_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + struct i915_vma *batch; + unsigned int id; + int err = 0; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + batch = empty_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_batch; + + /* Warmup / preload */ + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_batch; + + pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *recursive_batch(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(i915); + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + goto err; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + if (gen >= 8) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + } else if (gen >= 6) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; + *cmd++ = lower_32_bits(vma->node.start); + } else if (gen >= 4) { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(vma->node.start); + } else { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; + *cmd++ = lower_32_bits(vma->node.start); + } + *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + + wmb(); + i915_gem_object_unpin_map(obj); + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int recursive_batch_resolve(struct i915_vma *batch) +{ + u32 *cmd; + + cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + *cmd = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(batch->obj); + + return 0; +} + +static int live_all_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct drm_i915_gem_request *request[I915_NUM_ENGINES]; + struct i915_vma *batch; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines simultaneously. We + * send a recursive batch to each engine - checking that we don't + * block doing so, and that they don't complete too soon. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch, err=%d\n", __func__, err); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed with err=%d\n", + __func__, err); + goto out_request; + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + if (!i915_gem_object_has_active_reference(batch->obj)) { + i915_gem_object_get(batch->obj); + i915_gem_object_set_active_reference(batch->obj); + } + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + } + + for_each_engine(engine, i915, id) { + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + } + + err = recursive_batch_resolve(batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); + goto out_request; + } + + for_each_engine(engine, i915, id) { + long timeout; + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + i915_gem_request_put(request[id]); + request[id] = NULL; + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) + if (request[id]) + i915_gem_request_put(request[id]); + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_sequential_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request[I915_NUM_ENGINES] = {}; + struct drm_i915_gem_request *prev = NULL; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines sequentially, such + * that each successive request waits for the earlier ones. This + * tests that we don't execute requests out of order, even though + * they are running on independent engines. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + for_each_engine(engine, i915, id) { + struct i915_vma *batch; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch for %s, err=%d\n", + __func__, engine->name, err); + goto out_unlock; + } + + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + if (prev) { + err = i915_gem_request_await_dma_fence(request[id], + &prev->fence); + if (err) { + i915_add_request(request[id]); + pr_err("%s: Request await failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_get(batch); + + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + + prev = request[id]; + } + + for_each_engine(engine, i915, id) { + long timeout; + + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + + err = recursive_batch_resolve(request[id]->batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", + __func__, err); + goto out_request; + } + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) { + u32 *cmd; + + if (!request[id]) + break; + + cmd = i915_gem_object_pin_map(request[id]->batch->obj, + I915_MAP_WC); + if (!IS_ERR(cmd)) { + *cmd = MI_BATCH_BUFFER_END; + wmb(); + i915_gem_object_unpin_map(request[id]->batch->obj); + } + + i915_vma_put(request[id]->batch); + i915_gem_request_put(request[id]); + } +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_nop_request), + SUBTEST(live_all_engines), + SUBTEST(live_sequential_engines), + SUBTEST(live_empty_request), + }; + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h new file mode 100644 index 000000000000..18b174d855ca --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -0,0 +1,19 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ +selftest(uncore, intel_uncore_live_selftests) +selftest(requests, i915_gem_request_live_selftests) +selftest(objects, i915_gem_object_live_selftests) +selftest(dmabuf, i915_gem_dmabuf_live_selftests) +selftest(coherency, i915_gem_coherency_live_selftests) +selftest(gtt, i915_gem_gtt_live_selftests) +selftest(contexts, i915_gem_context_live_selftests) +selftest(hangcheck, intel_hangcheck_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h new file mode 100644 index 000000000000..be9a9ebf5692 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -0,0 +1,20 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ +selftest(scatterlist, scatterlist_mock_selftests) +selftest(uncore, intel_uncore_mock_selftests) +selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) +selftest(requests, i915_gem_request_mock_selftests) +selftest(objects, i915_gem_object_mock_selftests) +selftest(dmabuf, i915_gem_dmabuf_mock_selftests) +selftest(vma, i915_vma_mock_selftests) +selftest(evict, i915_gem_evict_mock_selftests) +selftest(gtt, i915_gem_gtt_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c new file mode 100644 index 000000000000..c17c83c30637 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -0,0 +1,63 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include "i915_random.h" + +static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) +{ + return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); +} + +void i915_random_reorder(unsigned int *order, unsigned int count, + struct rnd_state *state) +{ + unsigned int i, j; + + for (i = 0; i < count; i++) { + BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); + j = i915_prandom_u32_max_state(count, state); + swap(order[i], order[j]); + } +} + +unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) +{ + unsigned int *order, i; + + order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY); + if (!order) + return order; + + for (i = 0; i < count; i++) + order[i] = i; + + i915_random_reorder(order, count, state); + return order; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h new file mode 100644 index 000000000000..b9c334ce6cd9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -0,0 +1,50 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_SELFTESTS_RANDOM_H__ +#define __I915_SELFTESTS_RANDOM_H__ + +#include <linux/random.h> + +#include "../i915_selftest.h" + +#define I915_RND_STATE_INITIALIZER(x) ({ \ + struct rnd_state state__; \ + prandom_seed_state(&state__, (x)); \ + state__; \ +}) + +#define I915_RND_STATE(name__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed) + +#define I915_RND_SUBSTATE(name__, parent__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__))) + +unsigned int *i915_random_order(unsigned int count, + struct rnd_state *state); +void i915_random_reorder(unsigned int *order, + unsigned int count, + struct rnd_state *state); + +#endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c new file mode 100644 index 000000000000..6ba3abb10c6f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -0,0 +1,250 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/random.h> + +#include "../i915_drv.h" +#include "../i915_selftest.h" + +struct i915_selftest i915_selftest __read_mostly = { + .timeout_ms = 1000, +}; + +int i915_mock_sanitycheck(void) +{ + pr_info(DRIVER_NAME ": %s() - ok!\n", __func__); + return 0; +} + +int i915_live_sanitycheck(struct drm_i915_private *i915) +{ + pr_info("%s: %s() - ok!\n", i915->drm.driver->name, __func__); + return 0; +} + +enum { +#define selftest(name, func) mock_##name, +#include "i915_mock_selftests.h" +#undef selftest +}; + +enum { +#define selftest(name, func) live_##name, +#include "i915_live_selftests.h" +#undef selftest +}; + +struct selftest { + bool enabled; + const char *name; + union { + int (*mock)(void); + int (*live)(struct drm_i915_private *); + }; +}; + +#define selftest(n, f) [mock_##n] = { .name = #n, .mock = f }, +static struct selftest mock_selftests[] = { +#include "i915_mock_selftests.h" +}; +#undef selftest + +#define selftest(n, f) [live_##n] = { .name = #n, .live = f }, +static struct selftest live_selftests[] = { +#include "i915_live_selftests.h" +}; +#undef selftest + +/* Embed the line number into the parameter name so that we can order tests */ +#define selftest(n, func) selftest_0(n, func, param(n)) +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __mock_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, mock_selftests[mock_##n].enabled, bool, 0400); +#include "i915_mock_selftests.h" +#undef selftest_0 +#undef param + +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __live_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, live_selftests[live_##n].enabled, bool, 0400); +#include "i915_live_selftests.h" +#undef selftest_0 +#undef param +#undef selftest + +static void set_default_test_all(struct selftest *st, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + if (st[i].enabled) + return; + + for (i = 0; i < count; i++) + st[i].enabled = true; +} + +static int __run_selftests(const char *name, + struct selftest *st, + unsigned int count, + void *data) +{ + int err = 0; + + while (!i915_selftest.random_seed) + i915_selftest.random_seed = get_random_int(); + + i915_selftest.timeout_jiffies = + i915_selftest.timeout_ms ? + msecs_to_jiffies_timeout(i915_selftest.timeout_ms) : + MAX_SCHEDULE_TIMEOUT; + + set_default_test_all(st, count); + + pr_info(DRIVER_NAME ": Performing %s selftests with st_random_seed=0x%x st_timeout=%u\n", + name, i915_selftest.random_seed, i915_selftest.timeout_ms); + + /* Tests are listed in order in i915_*_selftests.h */ + for (; count--; st++) { + if (!st->enabled) + continue; + + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s\n", st->name); + if (data) + err = st->live(data); + else + err = st->mock(); + if (err == -EINTR && !signal_pending(current)) + err = 0; + if (err) + break; + } + + if (WARN(err > 0 || err == -ENOTTY, + "%s returned %d, conflicting with selftest's magic values!\n", + st->name, err)) + err = -1; + + return err; +} + +#define run_selftests(x, data) \ + __run_selftests(#x, x##_selftests, ARRAY_SIZE(x##_selftests), data) + +int i915_mock_selftests(void) +{ + int err; + + if (!i915_selftest.mock) + return 0; + + err = run_selftests(mock, NULL); + if (err) { + i915_selftest.mock = err; + return err; + } + + if (i915_selftest.mock < 0) { + i915_selftest.mock = -ENOTTY; + return 1; + } + + return 0; +} + +int i915_live_selftests(struct pci_dev *pdev) +{ + int err; + + if (!i915_selftest.live) + return 0; + + err = run_selftests(live, to_i915(pci_get_drvdata(pdev))); + if (err) { + i915_selftest.live = err; + return err; + } + + if (i915_selftest.live < 0) { + i915_selftest.live = -ENOTTY; + return 1; + } + + return 0; +} + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data) +{ + int err; + + for (; count--; st++) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name); + err = st->func(data); + if (err && err != -EINTR) { + pr_err(DRIVER_NAME "/%s: %s failed with error %d\n", + caller, st->name, err); + return err; + } + } + + return 0; +} + +bool __igt_timeout(unsigned long timeout, const char *fmt, ...) +{ + va_list va; + + if (!signal_pending(current)) { + cond_resched(); + if (time_before(jiffies, timeout)) + return false; + } + + if (fmt) { + va_start(va, fmt); + vprintk(fmt, va); + va_end(va); + } + + return true; +} + +module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); +module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); + +module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); +MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); + +module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); +MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c new file mode 100644 index 000000000000..ad56566e24db --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -0,0 +1,746 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_context.h" + +static bool assert_vma(struct i915_vma *vma, + struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx) +{ + bool ok = true; + + if (vma->vm != &ctx->ppgtt->base) { + pr_err("VMA created with wrong VM\n"); + ok = false; + } + + if (vma->size != obj->base.size) { + pr_err("VMA created with wrong size, found %llu, expected %zu\n", + vma->size, obj->base.size); + ok = false; + } + + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("VMA created with wrong type [%d]\n", + vma->ggtt_view.type); + ok = false; + } + + return ok; +} + +static struct i915_vma * +checked_vma_instance(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + struct i915_ggtt_view *view) +{ + struct i915_vma *vma; + bool ok = true; + + vma = i915_vma_instance(obj, vm, view); + if (IS_ERR(vma)) + return vma; + + /* Manual checks, will be reinforced by i915_vma_compare! */ + if (vma->vm != vm) { + pr_err("VMA's vm [%p] does not match request [%p]\n", + vma->vm, vm); + ok = false; + } + + if (i915_is_ggtt(vm) != i915_vma_is_ggtt(vma)) { + pr_err("VMA ggtt status [%d] does not match parent [%d]\n", + i915_vma_is_ggtt(vma), i915_is_ggtt(vm)); + ok = false; + } + + if (i915_vma_compare(vma, vm, view)) { + pr_err("i915_vma_compare failed with create parmaters!\n"); + return ERR_PTR(-EINVAL); + } + + if (i915_vma_compare(vma, vma->vm, + i915_vma_is_ggtt(vma) ? &vma->ggtt_view : NULL)) { + pr_err("i915_vma_compare failed with itself\n"); + return ERR_PTR(-EINVAL); + } + + if (!ok) { + pr_err("i915_vma_compare failed to detect the difference!\n"); + return ERR_PTR(-EINVAL); + } + + return vma; +} + +static int create_vmas(struct drm_i915_private *i915, + struct list_head *objects, + struct list_head *contexts) +{ + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + int pinned; + + list_for_each_entry(obj, objects, st_link) { + for (pinned = 0; pinned <= 1; pinned++) { + list_for_each_entry(ctx, contexts, link) { + struct i915_address_space *vm = + &ctx->ppgtt->base; + struct i915_vma *vma; + int err; + + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (!assert_vma(vma, obj, ctx)) { + pr_err("VMA lookup/create failed\n"); + return -EINVAL; + } + + if (!pinned) { + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + pr_err("Failed to pin VMA\n"); + return err; + } + } else { + i915_vma_unpin(vma); + } + } + } + } + + return 0; +} + +static int igt_vma_create(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct i915_gem_context *ctx, *cn; + unsigned long num_obj, num_ctx; + unsigned long no, nc; + IGT_TIMEOUT(end_time); + LIST_HEAD(contexts); + LIST_HEAD(objects); + int err; + + /* Exercise creating many vma amonst many objections, checking the + * vma creation and lookup routines. + */ + + no = 0; + for_each_prime_number(num_obj, ULONG_MAX - 1) { + for (; no < num_obj; no++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + list_add(&obj->st_link, &objects); + } + + nc = 0; + for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) { + for (; nc < num_ctx; nc++) { + ctx = mock_context(i915, "mock"); + if (!ctx) + goto out; + + list_move(&ctx->link, &contexts); + } + + err = create_vmas(i915, &objects, &contexts); + if (err) + goto out; + + if (igt_timeout(end_time, + "%s timed out: after %lu objects in %lu contexts\n", + __func__, no, nc)) + goto end; + } + + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + } + +end: + /* Final pass to lookup all created contexts */ + err = create_vmas(i915, &objects, &contexts); +out: + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + + list_for_each_entry_safe(obj, on, &objects, st_link) + i915_gem_object_put(obj); + return err; +} + +struct pin_mode { + u64 size; + u64 flags; + bool (*assert)(const struct i915_vma *, + const struct pin_mode *mode, + int result); + const char *string; +}; + +static bool assert_pin_valid(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + if (result) + return false; + + if (i915_vma_misplaced(vma, mode->size, 0, mode->flags)) + return false; + + return true; +} + +__maybe_unused +static bool assert_pin_e2big(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -E2BIG; +} + +__maybe_unused +static bool assert_pin_enospc(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -ENOSPC; +} + +__maybe_unused +static bool assert_pin_einval(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -EINVAL; +} + +static int igt_vma_pin1(void *arg) +{ + struct drm_i915_private *i915 = arg; + const struct pin_mode modes[] = { +#define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " } +#define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" } +#define INVALID(sz, fl) __INVALID(sz, fl, assert_pin_einval, EINVAL) +#define TOOBIG(sz, fl) __INVALID(sz, fl, assert_pin_e2big, E2BIG) +#define NOSPACE(sz, fl) __INVALID(sz, fl, assert_pin_enospc, ENOSPC) + VALID(0, PIN_GLOBAL), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE), + + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 4096), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 8192), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), + + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | i915->ggtt.mappable_end), + VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | i915->ggtt.base.total), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | round_down(U64_MAX, PAGE_SIZE)), + + VALID(4096, PIN_GLOBAL), + VALID(8192, PIN_GLOBAL), + VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), + TOOBIG(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.base.total - 4096, PIN_GLOBAL), + VALID(i915->ggtt.base.total, PIN_GLOBAL), + TOOBIG(i915->ggtt.base.total + 4096, PIN_GLOBAL), + TOOBIG(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), + INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), + + VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + +#if !IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) + /* Misusing BIAS is a programming error (it is not controllable + * from userspace) so when debugging is enabled, it explodes. + * However, the tests are still quite interesting for checking + * variable start, end and size. + */ + NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | i915->ggtt.mappable_end), + NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | i915->ggtt.base.total), + NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), +#endif + { }, +#undef NOSPACE +#undef TOOBIG +#undef INVALID +#undef __INVALID +#undef VALID + }, *m; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err = -EINVAL; + + /* Exercise all the weird and wonderful i915_vma_pin requests, + * focusing on error handling of boundary conditions. + */ + + GEM_BUG_ON(!drm_mm_clean(&i915->ggtt.base.mm)); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = checked_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + goto out; + + for (m = modes; m->assert; m++) { + err = i915_vma_pin(vma, m->size, 0, m->flags); + if (!m->assert(vma, m, err)) { + pr_err("%s to pin single page into GGTT with mode[%d:%s]: size=%llx flags=%llx, err=%d\n", + m->assert == assert_pin_valid ? "Failed" : "Unexpectedly succeeded", + (int)(m - modes), m->string, m->size, m->flags, + err); + if (!err) + i915_vma_unpin(vma); + err = -EINVAL; + goto out; + } + + if (!err) { + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + if (err) { + pr_err("Failed to unbind single page from GGTT, err=%d\n", err); + goto out; + } + } + } + + err = 0; +out: + i915_gem_object_put(obj); + return err; +} + +static unsigned long rotated_index(const struct intel_rotation_info *r, + unsigned int n, + unsigned int x, + unsigned int y) +{ + return (r->plane[n].stride * (r->plane[n].height - y - 1) + + r->plane[n].offset + x); +} + +static struct scatterlist * +assert_rotated(struct drm_i915_gem_object *obj, + const struct intel_rotation_info *r, unsigned int n, + struct scatterlist *sg) +{ + unsigned int x, y; + + for (x = 0; x < r->plane[n].width; x++) { + for (y = 0; y < r->plane[n].height; y++) { + unsigned long src_idx; + dma_addr_t src; + + if (!sg) { + pr_err("Invalid sg table: too short at plane %d, (%d, %d)!\n", + n, x, y); + return ERR_PTR(-EINVAL); + } + + src_idx = rotated_index(r, n, x, y); + src = i915_gem_object_get_dma_address(obj, src_idx); + + if (sg_dma_len(sg) != PAGE_SIZE) { + pr_err("Invalid sg.length, found %d, expected %lu for rotated page (%d, %d) [src index %lu]\n", + sg_dma_len(sg), PAGE_SIZE, + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + if (sg_dma_address(sg) != src) { + pr_err("Invalid address for rotated page (%d, %d) [src index %lu]\n", + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + sg = sg_next(sg); + } + } + + return sg; +} + +static unsigned int rotated_size(const struct intel_rotation_plane_info *a, + const struct intel_rotation_plane_info *b) +{ + return a->width * a->height + b->width * b->height; +} + +static int igt_vma_rotate(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const struct intel_rotation_plane_info planes[] = { + { .width = 1, .height = 1, .stride = 1 }, + { .width = 2, .height = 2, .stride = 2 }, + { .width = 4, .height = 4, .stride = 4 }, + { .width = 8, .height = 8, .stride = 8 }, + + { .width = 3, .height = 5, .stride = 3 }, + { .width = 3, .height = 5, .stride = 4 }, + { .width = 3, .height = 5, .stride = 5 }, + + { .width = 5, .height = 3, .stride = 5 }, + { .width = 5, .height = 3, .stride = 7 }, + { .width = 5, .height = 3, .stride = 9 }, + + { .width = 4, .height = 6, .stride = 6 }, + { .width = 6, .height = 4, .stride = 6 }, + { } + }, *a, *b; + const unsigned int max_pages = 64; + int err = -ENOMEM; + + /* Create VMA for many different combinations of planes and check + * that the page layout within the rotated VMA match our expectations. + */ + + obj = i915_gem_object_create_internal(i915, max_pages * PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (a = planes; a->width; a++) { + for (b = planes + ARRAY_SIZE(planes); b-- != planes; ) { + struct i915_ggtt_view view; + unsigned int n, max_offset; + + max_offset = max(a->stride * a->height, + b->stride * b->height); + GEM_BUG_ON(max_offset > max_pages); + max_offset = max_pages - max_offset; + + view.type = I915_GGTT_VIEW_ROTATED; + view.rotated.plane[0] = *a; + view.rotated.plane[1] = *b; + + for_each_prime_number_from(view.rotated.plane[0].offset, 0, max_offset) { + for_each_prime_number_from(view.rotated.plane[1].offset, 0, max_offset) { + struct scatterlist *sg; + struct i915_vma *vma; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + pr_err("Failed to pin VMA, err=%d\n", err); + goto out_object; + } + + if (vma->size != rotated_size(a, b) * PAGE_SIZE) { + pr_err("VMA is wrong size, expected %lu, found %llu\n", + PAGE_SIZE * rotated_size(a, b), vma->size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages->nents != rotated_size(a, b)) { + pr_err("sg table is wrong sizeo, expected %u, found %u nents\n", + rotated_size(a, b), vma->pages->nents); + err = -EINVAL; + goto out_object; + } + + if (vma->node.size < vma->size) { + pr_err("VMA binding too small, expected %llu, found %llu\n", + vma->size, vma->node.size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages == obj->mm.pages) { + pr_err("VMA using unrotated object pages!\n"); + err = -EINVAL; + goto out_object; + } + + sg = vma->pages->sgl; + for (n = 0; n < ARRAY_SIZE(view.rotated.plane); n++) { + sg = assert_rotated(obj, &view.rotated, n, sg); + if (IS_ERR(sg)) { + pr_err("Inconsistent VMA pages for plane %d: [(%d, %d, %d, %d), (%d, %d, %d, %d)]\n", n, + view.rotated.plane[0].width, + view.rotated.plane[0].height, + view.rotated.plane[0].stride, + view.rotated.plane[0].offset, + view.rotated.plane[1].width, + view.rotated.plane[1].height, + view.rotated.plane[1].stride, + view.rotated.plane[1].offset); + err = -EINVAL; + goto out_object; + } + } + + i915_vma_unpin(vma); + } + } + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + +static bool assert_partial(struct drm_i915_gem_object *obj, + struct i915_vma *vma, + unsigned long offset, + unsigned long size) +{ + struct sgt_iter sgt; + dma_addr_t dma; + + for_each_sgt_dma(dma, sgt, vma->pages) { + dma_addr_t src; + + if (!size) { + pr_err("Partial scattergather list too long\n"); + return false; + } + + src = i915_gem_object_get_dma_address(obj, offset); + if (src != dma) { + pr_err("DMA mismatch for partial page offset %lu\n", + offset); + return false; + } + + offset++; + size--; + } + + return true; +} + +static bool assert_pin(struct i915_vma *vma, + struct i915_ggtt_view *view, + u64 size, + const char *name) +{ + bool ok = true; + + if (vma->size != size) { + pr_err("(%s) VMA is wrong size, expected %llu, found %llu\n", + name, size, vma->size); + ok = false; + } + + if (vma->node.size < vma->size) { + pr_err("(%s) VMA binding too small, expected %llu, found %llu\n", + name, vma->size, vma->node.size); + ok = false; + } + + if (view && view->type != I915_GGTT_VIEW_NORMAL) { + if (memcmp(&vma->ggtt_view, view, sizeof(*view))) { + pr_err("(%s) VMA mismatch upon creation!\n", + name); + ok = false; + } + + if (vma->pages == vma->obj->mm.pages) { + pr_err("(%s) VMA using original object pages!\n", + name); + ok = false; + } + } else { + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("Not the normal ggtt view! Found %d\n", + vma->ggtt_view.type); + ok = false; + } + + if (vma->pages != vma->obj->mm.pages) { + pr_err("VMA not using object pages!\n"); + ok = false; + } + } + + return ok; +} + +static int igt_vma_partial(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + const unsigned int npages = 1021; /* prime! */ + struct drm_i915_gem_object *obj; + const struct phase { + const char *name; + } phases[] = { + { "create" }, + { "lookup" }, + { }, + }, *p; + unsigned int sz, offset; + struct i915_vma *vma; + int err = -ENOMEM; + + /* Create lots of different VMA for the object and check that + * we are returned the same VMA when we later request the same range. + */ + + obj = i915_gem_object_create_internal(i915, npages*PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (p = phases; p->name; p++) { /* exercise both create/lookup */ + unsigned int count, nvma; + + nvma = 0; + for_each_prime_number_from(sz, 1, npages) { + for_each_prime_number_from(offset, 0, npages - sz) { + struct i915_ggtt_view view; + + view.type = I915_GGTT_VIEW_PARTIAL; + view.partial.offset = offset; + view.partial.size = sz; + + if (sz == npages) + view.type = I915_GGTT_VIEW_NORMAL; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, &view, sz*PAGE_SIZE, p->name)) { + pr_err("(%s) Inconsistent partial pinning for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + if (!assert_partial(obj, vma, offset, sz)) { + pr_err("(%s) Inconsistent partial pages for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + nvma++; + } + } + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", + p->name, count, nvma); + err = -EINVAL; + goto out_object; + } + + /* Check that we did create the whole object mapping */ + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, NULL, obj->base.size, p->name)) { + pr_err("(%s) inconsistent full pin\n", p->name); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) allocated an extra full vma!\n", p->name); + err = -EINVAL; + goto out_object; + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + +int i915_vma_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_vma_create), + SUBTEST(igt_vma_pin1), + SUBTEST(igt_vma_rotate), + SUBTEST(igt_vma_partial), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c new file mode 100644 index 000000000000..621be1ca53d8 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -0,0 +1,481 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_gem_device.h" +#include "mock_engine.h" + +static int check_rbtree(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct rb_node *rb; + int n; + + if (&b->first_wait->node != rb_first(&b->waiters)) { + pr_err("First waiter does not match first element of wait-tree\n"); + return -EINVAL; + } + + n = find_first_bit(bitmap, count); + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { + struct intel_wait *w = container_of(rb, typeof(*w), node); + int idx = w - waiters; + + if (!test_bit(idx, bitmap)) { + pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n", + idx, w->seqno); + return -EINVAL; + } + + if (n != idx) { + pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n", + idx, w->seqno, n); + return -EINVAL; + } + + n = find_next_bit(bitmap, count, n + 1); + } + + return 0; +} + +static int check_completion(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + int n; + + for (n = 0; n < count; n++) { + if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap)) + continue; + + pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n", + n, waiters[n].seqno, + intel_wait_complete(&waiters[n]) ? "complete" : "active", + test_bit(n, bitmap) ? "active" : "complete"); + return -EINVAL; + } + + return 0; +} + +static int check_rbtree_empty(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (b->first_wait) { + pr_err("Empty breadcrumbs still has a waiter\n"); + return -EINVAL; + } + + if (!RB_EMPTY_ROOT(&b->waiters)) { + pr_err("Empty breadcrumbs, but wait-tree not empty\n"); + return -EINVAL; + } + + return 0; +} + +static int igt_random_insert_remove(void *arg) +{ + const u32 seqno_bias = 0x1000; + I915_RND_STATE(prng); + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned int *order; + unsigned long *bitmap; + int err = -ENOMEM; + int n; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + order = i915_random_order(count, &prng); + if (!order) + goto out_bitmap; + + for (n = 0; n < count; n++) + intel_wait_init_for_seqno(&waiters[n], seqno_bias + n); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + + /* Add and remove waiters into the rbtree in random order. At each + * step, we verify that the rbtree is correctly ordered. + */ + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_add_wait(engine, &waiters[i]); + __set_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_remove_wait(engine, &waiters[i]); + __clear_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + err = check_rbtree_empty(engine); +out_order: + kfree(order); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +static int igt_insert_complete(void *arg) +{ + const u32 seqno_bias = 0x1000; + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned long *bitmap; + int err = -ENOMEM; + int n, m; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + for (n = 0; n < count; n++) { + intel_wait_init_for_seqno(&waiters[n], n + seqno_bias); + intel_engine_add_wait(engine, &waiters[n]); + __set_bit(n, bitmap); + } + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_bitmap; + + /* On each step, we advance the seqno so that several waiters are then + * complete (we increase the seqno by increasingly larger values to + * retire more and more waiters at once). All retired waiters should + * be woken and removed from the rbtree, and so that we check. + */ + for (n = 0; n < count; n = m) { + int seqno = 2 * n; + + GEM_BUG_ON(find_first_bit(bitmap, count) != n); + + if (intel_wait_complete(&waiters[n])) { + pr_err("waiter[%d, seqno=%d] completed too early\n", + n, waiters[n].seqno); + err = -EINVAL; + goto out_bitmap; + } + + /* complete the following waiters */ + mock_seqno_advance(engine, seqno + seqno_bias); + for (m = n; m <= seqno; m++) { + if (m == count) + break; + + GEM_BUG_ON(!test_bit(m, bitmap)); + __clear_bit(m, bitmap); + } + + intel_engine_remove_wait(engine, &waiters[n]); + RB_CLEAR_NODE(&waiters[n].node); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) { + pr_err("rbtree corrupt after seqno advance to %d\n", + seqno + seqno_bias); + goto out_bitmap; + } + + err = check_completion(engine, bitmap, waiters, count); + if (err) { + pr_err("completions after seqno advance to %d failed\n", + seqno + seqno_bias); + goto out_bitmap; + } + } + + err = check_rbtree_empty(engine); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +struct igt_wakeup { + struct task_struct *tsk; + atomic_t *ready, *set, *done; + struct intel_engine_cs *engine; + unsigned long flags; +#define STOP 0 +#define IDLE 1 + wait_queue_head_t *wq; + u32 seqno; +}; + +static int wait_atomic(atomic_t *p) +{ + schedule(); + return 0; +} + +static int wait_atomic_timeout(atomic_t *p) +{ + return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; +} + +static bool wait_for_ready(struct igt_wakeup *w) +{ + DEFINE_WAIT(ready); + + set_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->done)) + wake_up_atomic_t(w->done); + + if (test_bit(STOP, &w->flags)) + goto out; + + for (;;) { + prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE); + if (atomic_read(w->ready) == 0) + break; + + schedule(); + } + finish_wait(w->wq, &ready); + +out: + clear_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->set)) + wake_up_atomic_t(w->set); + + return !test_bit(STOP, &w->flags); +} + +static int igt_wakeup_thread(void *arg) +{ + struct igt_wakeup *w = arg; + struct intel_wait wait; + + while (wait_for_ready(w)) { + GEM_BUG_ON(kthread_should_stop()); + + intel_wait_init_for_seqno(&wait, w->seqno); + intel_engine_add_wait(w->engine, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (i915_seqno_passed(intel_engine_get_seqno(w->engine), + w->seqno)) + break; + + if (test_bit(STOP, &w->flags)) /* emergency escape */ + break; + + schedule(); + } + intel_engine_remove_wait(w->engine, &wait); + __set_current_state(TASK_RUNNING); + } + + return 0; +} + +static void igt_wake_all_sync(atomic_t *ready, + atomic_t *set, + atomic_t *done, + wait_queue_head_t *wq, + int count) +{ + atomic_set(set, count); + atomic_set(ready, 0); + wake_up_all(wq); + + wait_on_atomic_t(set, wait_atomic, TASK_UNINTERRUPTIBLE); + atomic_set(ready, count); + atomic_set(done, count); +} + +static int igt_wakeup(void *arg) +{ + I915_RND_STATE(prng); + const int state = TASK_UNINTERRUPTIBLE; + struct intel_engine_cs *engine = arg; + struct igt_wakeup *waiters; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + const int count = 4096; + const u32 max_seqno = count / 4; + atomic_t ready, set, done; + int err = -ENOMEM; + int n, step; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + /* Create a large number of threads, each waiting on a random seqno. + * Multiple waiters will be waiting for the same seqno. + */ + atomic_set(&ready, count); + for (n = 0; n < count; n++) { + waiters[n].wq = &wq; + waiters[n].ready = &ready; + waiters[n].set = &set; + waiters[n].done = &done; + waiters[n].engine = engine; + waiters[n].flags = BIT(IDLE); + + waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], + "i915/igt:%d", n); + if (IS_ERR(waiters[n].tsk)) + goto out_waiters; + + get_task_struct(waiters[n].tsk); + } + + for (step = 1; step <= max_seqno; step <<= 1) { + u32 seqno; + + /* The waiter threads start paused as we assign them a random + * seqno and reset the engine. Once the engine is reset, + * we signal that the threads may begin their wait upon their + * seqno. + */ + for (n = 0; n < count; n++) { + GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); + waiters[n].seqno = + 1 + prandom_u32_state(&prng) % max_seqno; + } + mock_seqno_advance(engine, 0); + igt_wake_all_sync(&ready, &set, &done, &wq, count); + + /* Simulate the GPU doing chunks of work, with one or more + * seqno appearing to finish at the same time. A random number + * of threads will be waiting upon the update and hopefully be + * woken. + */ + for (seqno = 1; seqno <= max_seqno + step; seqno += step) { + usleep_range(50, 500); + mock_seqno_advance(engine, seqno); + } + GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); + + /* With the seqno now beyond any of the waiting threads, they + * should all be woken, see that they are complete and signal + * that they are ready for the next test. We wait until all + * threads are complete and waiting for us (i.e. not a seqno). + */ + err = wait_on_atomic_t(&done, wait_atomic_timeout, state); + if (err) { + pr_err("Timed out waiting for %d remaining waiters\n", + atomic_read(&done)); + break; + } + + err = check_rbtree_empty(engine); + if (err) + break; + } + +out_waiters: + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + set_bit(STOP, &waiters[n].flags); + } + mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ + igt_wake_all_sync(&ready, &set, &done, &wq, n); + + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + kthread_stop(waiters[n].tsk); + put_task_struct(waiters[n].tsk); + } + + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +int intel_breadcrumbs_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_random_insert_remove), + SUBTEST(igt_insert_complete), + SUBTEST(igt_wakeup), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915->engine[RCS]); + drm_dev_unref(&i915->drm); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c new file mode 100644 index 000000000000..d4acee6730e9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -0,0 +1,537 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +struct hang { + struct drm_i915_private *i915; + struct drm_i915_gem_object *hws; + struct drm_i915_gem_object *obj; + u32 *seqno; + u32 *batch; +}; + +static int hang_init(struct hang *h, struct drm_i915_private *i915) +{ + void *vaddr; + int err; + + memset(h, 0, sizeof(*h)); + h->i915 = i915; + + h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->hws)) + return PTR_ERR(h->hws); + + h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->obj)) { + err = PTR_ERR(h->obj); + goto err_hws; + } + + i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC); + vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + h->seqno = memset(vaddr, 0xff, PAGE_SIZE); + + vaddr = i915_gem_object_pin_map(h->obj, + HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_unpin_hws; + } + h->batch = vaddr; + + return 0; + +err_unpin_hws: + i915_gem_object_unpin_map(h->hws); +err_obj: + i915_gem_object_put(h->obj); +err_hws: + i915_gem_object_put(h->hws); + return err; +} + +static u64 hws_address(const struct i915_vma *hws, + const struct drm_i915_gem_request *rq) +{ + return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); +} + +static int emit_recurse_batch(struct hang *h, + struct drm_i915_gem_request *rq) +{ + struct drm_i915_private *i915 = h->i915; + struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; + struct i915_vma *hws, *vma; + unsigned int flags; + u32 *batch; + int err; + + vma = i915_vma_instance(h->obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + hws = i915_vma_instance(h->hws, vm, NULL); + if (IS_ERR(hws)) + return PTR_ERR(hws); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + err = i915_vma_pin(hws, 0, 0, PIN_USER); + if (err) + goto unpin_vma; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto unpin_hws; + + err = i915_switch_context(rq); + if (err) + goto unpin_hws; + + i915_vma_move_to_active(vma, rq, 0); + if (!i915_gem_object_has_active_reference(vma->obj)) { + i915_gem_object_get(vma->obj); + i915_gem_object_set_active_reference(vma->obj); + } + + i915_vma_move_to_active(hws, rq, 0); + if (!i915_gem_object_has_active_reference(hws->obj)) { + i915_gem_object_get(hws->obj); + i915_gem_object_set_active_reference(hws->obj); + } + + batch = h->batch; + if (INTEL_GEN(i915) >= 8) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = upper_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *batch++ = lower_32_bits(vma->node.start); + *batch++ = upper_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8; + *batch++ = lower_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6; + *batch++ = lower_32_bits(vma->node.start); + } else { + *batch++ = MI_STORE_DWORD_IMM; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; + *batch++ = lower_32_bits(vma->node.start); + } + *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); + +unpin_hws: + i915_vma_unpin(hws); +unpin_vma: + i915_vma_unpin(vma); + return err; +} + +static struct drm_i915_gem_request * +hang_create_request(struct hang *h, + struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct drm_i915_gem_request *rq; + int err; + + if (i915_gem_object_is_active(h->obj)) { + struct drm_i915_gem_object *obj; + void *vaddr; + + obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vaddr = i915_gem_object_pin_map(obj, + HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + i915_gem_object_put(obj); + return ERR_CAST(vaddr); + } + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + h->obj = obj; + h->batch = vaddr; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return rq; + + err = emit_recurse_batch(h, rq); + if (err) { + __i915_add_request(rq, false); + return ERR_PTR(err); + } + + return rq; +} + +static u32 hws_seqno(const struct hang *h, + const struct drm_i915_gem_request *rq) +{ + return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); +} + +static void hang_fini(struct hang *h) +{ + *h->batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + i915_gem_object_unpin_map(h->hws); + i915_gem_object_put(h->hws); + + i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); + i915_gem_retire_requests(h->i915); +} + +static int igt_hang_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Basic check that we can execute our hanging batch */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + long timeout; + + rq = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + pr_err("Failed to create request for %s, err=%d\n", + engine->name, err); + goto fini; + } + + i915_gem_request_get(rq); + + *h.batch = MI_BATCH_BUFFER_END; + __i915_add_request(rq, true); + + timeout = i915_wait_request(rq, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + i915_gem_request_put(rq); + + if (timeout < 0) { + err = timeout; + pr_err("Wait for request failed on %s, err=%d\n", + engine->name, err); + goto fini; + } + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_global_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + unsigned int reset_count; + int err = 0; + + /* Check that we can issue a global GPU reset */ + + set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + reset_count = i915_reset_count(&i915->gpu_error); + + i915_reset(i915); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + } + mutex_unlock(&i915->drm.struct_mutex); + + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags)); + if (i915_terminally_wedged(&i915->gpu_error)) + err = -EIO; + + return err; +} + +static u32 fake_hangcheck(struct drm_i915_gem_request *rq) +{ + u32 reset_count; + + rq->engine->hangcheck.stalled = true; + rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); + + reset_count = i915_reset_count(&rq->i915->gpu_error); + + set_bit(I915_RESET_IN_PROGRESS, &rq->i915->gpu_error.flags); + wake_up_all(&rq->i915->gpu_error.wait_queue); + + return reset_count; +} + +static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +{ + return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 1000)); +} + +static int igt_wait_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + unsigned int reset_count; + struct hang h; + long timeout; + int err; + + /* Check that we detect a stuck waiter and issue a reset */ + + set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, rq)) { + pr_err("Failed to start request %x\n", rq->fence.seqno); + err = -EIO; + goto out_rq; + } + + reset_count = fake_hangcheck(rq); + + timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); + if (timeout < 0) { + pr_err("i915_wait_request failed on a stuck request: err=%ld\n", + timeout); + err = timeout; + goto out_rq; + } + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags)); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + goto out_rq; + } + +out_rq: + i915_gem_request_put(rq); +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +static int igt_reset_queue(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Check that we replay pending requests following a hang */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_request *prev; + IGT_TIMEOUT(end_time); + unsigned int count; + + prev = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(prev)) { + err = PTR_ERR(prev); + goto fini; + } + + i915_gem_request_get(prev); + __i915_add_request(prev, true); + + count = 0; + do { + struct drm_i915_gem_request *rq; + unsigned int reset_count; + + rq = hang_create_request(&h, + engine, + i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, prev)) { + pr_err("Failed to start request %x\n", + prev->fence.seqno); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EIO; + goto fini; + } + + reset_count = fake_hangcheck(prev); + + i915_reset(i915); + + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, + &i915->gpu_error.flags)); + if (prev->fence.error != -EIO) { + pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", + prev->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (rq->fence.error) { + pr_err("Fence error status not zero [%d] after unrelated reset\n", + rq->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + i915_gem_request_put(prev); + prev = rq; + count++; + } while (time_before(jiffies, end_time)); + pr_info("%s: Completed %d resets\n", engine->name, count); + + *h.batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_request_put(prev); + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +int intel_hangcheck_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_hang_sanitycheck), + SUBTEST(igt_global_reset), + SUBTEST(igt_wait_reset), + SUBTEST(igt_reset_queue), + }; + + if (!intel_has_gpu_reset(i915)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c new file mode 100644 index 000000000000..2d0fef2cfca6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -0,0 +1,182 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +static int intel_fw_table_check(const struct intel_forcewake_range *ranges, + unsigned int num_ranges, + bool is_watertight) +{ + unsigned int i; + s32 prev; + + for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { + /* Check that the table is watertight */ + if (is_watertight && (prev + 1) != (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is not watertight to previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the table never goes backwards */ + if (prev >= (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is less than the previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the entry is valid */ + if (ranges->start >= ranges->end) { + pr_err("%s: entry[%d]:(%x, %x) has negative length\n", + __func__, i, ranges->start, ranges->end); + return -EINVAL; + } + + prev = ranges->end; + } + + return 0; +} + +static int intel_shadow_table_check(void) +{ + const i915_reg_t *reg = gen8_shadowed_regs; + unsigned int i; + s32 prev; + + for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { + u32 offset = i915_mmio_reg_offset(*reg); + + if (prev >= (s32)offset) { + pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", + __func__, i, offset, prev); + return -EINVAL; + } + + prev = offset; + } + + return 0; +} + +int intel_uncore_mock_selftests(void) +{ + struct { + const struct intel_forcewake_range *ranges; + unsigned int num_ranges; + bool is_watertight; + } fw[] = { + { __vlv_fw_ranges, ARRAY_SIZE(__vlv_fw_ranges), false }, + { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, + { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, + }; + int err, i; + + for (i = 0; i < ARRAY_SIZE(fw); i++) { + err = intel_fw_table_check(fw[i].ranges, + fw[i].num_ranges, + fw[i].is_watertight); + if (err) + return err; + } + + err = intel_shadow_table_check(); + if (err) + return err; + + return 0; +} + +static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_priv) +{ +#define FW_RANGE 0x40000 + unsigned long *valid; + u32 offset; + int err; + + if (!HAS_FPGA_DBG_UNCLAIMED(dev_priv) && + !IS_VALLEYVIEW(dev_priv) && + !IS_CHERRYVIEW(dev_priv)) + return 0; + + if (IS_VALLEYVIEW(dev_priv)) /* XXX system lockup! */ + return 0; + + if (IS_BROADWELL(dev_priv)) /* XXX random GPU hang afterwards! */ + return 0; + + valid = kzalloc(BITS_TO_LONGS(FW_RANGE) * sizeof(*valid), + GFP_TEMPORARY); + if (!valid) + return -ENOMEM; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + check_for_unclaimed_mmio(dev_priv); + for (offset = 0; offset < FW_RANGE; offset += 4) { + i915_reg_t reg = { offset }; + + (void)I915_READ_FW(reg); + if (!check_for_unclaimed_mmio(dev_priv)) + set_bit(offset, valid); + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + err = 0; + for_each_set_bit(offset, valid, FW_RANGE) { + i915_reg_t reg = { offset }; + + intel_uncore_forcewake_reset(dev_priv, false); + check_for_unclaimed_mmio(dev_priv); + + (void)I915_READ(reg); + if (check_for_unclaimed_mmio(dev_priv)) { + pr_err("Unclaimed mmio read to register 0x%04x\n", + offset); + err = -EINVAL; + } + } + + kfree(valid); + return err; +} + +int intel_uncore_live_selftests(struct drm_i915_private *i915) +{ + int err; + + /* Confirm the table we load is still valid */ + err = intel_fw_table_check(i915->uncore.fw_domains_table, + i915->uncore.fw_domains_table_entries, + INTEL_GEN(i915) >= 9); + if (err) + return err; + + err = intel_uncore_check_forcewake_domains(i915); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c new file mode 100644 index 000000000000..8d3a90c3f8ac --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_context.h" +#include "mock_gtt.h" + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name) +{ + struct i915_gem_context *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + kref_init(&ctx->ref); + INIT_LIST_HEAD(&ctx->link); + ctx->i915 = i915; + + ret = ida_simple_get(&i915->context_hw_ida, + 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + if (ret < 0) + goto err_free; + ctx->hw_id = ret; + + if (name) { + ctx->name = kstrdup(name, GFP_KERNEL); + if (!ctx->name) + goto err_put; + + ctx->ppgtt = mock_ppgtt(i915, name); + if (!ctx->ppgtt) + goto err_put; + } + + return ctx; + +err_free: + kfree(ctx); + return NULL; + +err_put: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return NULL; +} + +void mock_context_close(struct i915_gem_context *ctx) +{ + i915_gem_context_set_closed(ctx); + + i915_ppgtt_close(&ctx->ppgtt->base); + + i915_gem_context_put(ctx); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h new file mode 100644 index 000000000000..2427e5c0916a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_CONTEXT_H +#define __MOCK_CONTEXT_H + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name); + +void mock_context_close(struct i915_gem_context *ctx); + +#endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c new file mode 100644 index 000000000000..99da8f4ef497 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c @@ -0,0 +1,176 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_dmabuf.h" + +static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct mock_dmabuf *mock = to_mock(attachment->dmabuf); + struct sg_table *st; + struct scatterlist *sg; + int i, err; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + err = sg_alloc_table(st, mock->npages, GFP_KERNEL); + if (err) + goto err_free; + + sg = st->sgl; + for (i = 0; i < mock->npages; i++) { + sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0); + sg = sg_next(sg); + } + + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + err = -ENOMEM; + goto err_st; + } + + return st; + +err_st: + sg_free_table(st); +err_free: + kfree(st); + return ERR_PTR(err); +} + +static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *st, + enum dma_data_direction dir) +{ + dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); + sg_free_table(st); + kfree(st); +} + +static void mock_dmabuf_release(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + int i; + + for (i = 0; i < mock->npages; i++) + put_page(mock->pages[i]); + + kfree(mock); +} + +static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); +} + +static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + vm_unmap_ram(vaddr, mock->npages); +} + +static void *mock_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap_atomic(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + kunmap_atomic(addr); +} + +static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kunmap(mock->pages[page_num]); +} + +static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + return -ENODEV; +} + +static const struct dma_buf_ops mock_dmabuf_ops = { + .map_dma_buf = mock_map_dma_buf, + .unmap_dma_buf = mock_unmap_dma_buf, + .release = mock_dmabuf_release, + .kmap = mock_dmabuf_kmap, + .kmap_atomic = mock_dmabuf_kmap_atomic, + .kunmap = mock_dmabuf_kunmap, + .kunmap_atomic = mock_dmabuf_kunmap_atomic, + .mmap = mock_dmabuf_mmap, + .vmap = mock_dmabuf_vmap, + .vunmap = mock_dmabuf_vunmap, +}; + +static struct dma_buf *mock_dmabuf(int npages) +{ + struct mock_dmabuf *mock; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dmabuf; + int i; + + mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), + GFP_KERNEL); + if (!mock) + return ERR_PTR(-ENOMEM); + + mock->npages = npages; + for (i = 0; i < npages; i++) { + mock->pages[i] = alloc_page(GFP_KERNEL); + if (!mock->pages[i]) + goto err; + } + + exp_info.ops = &mock_dmabuf_ops; + exp_info.size = npages * PAGE_SIZE; + exp_info.flags = O_CLOEXEC; + exp_info.priv = mock; + + dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(dmabuf)) + goto err; + + return dmabuf; + +err: + while (i--) + put_page(mock->pages[i]); + kfree(mock); + return ERR_PTR(-ENOMEM); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h new file mode 100644 index 000000000000..ec80613159b9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h @@ -0,0 +1,41 @@ + +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DMABUF_H__ +#define __MOCK_DMABUF_H__ + +#include <linux/dma-buf.h> + +struct mock_dmabuf { + int npages; + struct page *pages[]; +}; + +static struct mock_dmabuf *to_mock(struct dma_buf *buf) +{ + return buf->priv; +} + +#endif /* !__MOCK_DMABUF_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.c b/drivers/gpu/drm/i915/selftests/mock_drm.c new file mode 100644 index 000000000000..113dec05c7dc --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_drm.h" + +static inline struct inode fake_inode(struct drm_i915_private *i915) +{ + return (struct inode){ .i_rdev = i915->drm.primary->index }; +} + +struct drm_file *mock_file(struct drm_i915_private *i915) +{ + struct inode inode = fake_inode(i915); + struct file filp = {}; + struct drm_file *file; + int err; + + err = drm_open(&inode, &filp); + if (unlikely(err)) + return ERR_PTR(err); + + file = filp.private_data; + file->authenticated = true; + return file; +} + +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file) +{ + struct inode inode = fake_inode(i915); + struct file filp = { .private_data = file }; + + drm_release(&inode, &filp); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.h b/drivers/gpu/drm/i915/selftests/mock_drm.h new file mode 100644 index 000000000000..b39beee9f8f6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DRM_H +#define __MOCK_DRM_H + +struct drm_file *mock_file(struct drm_i915_private *i915); +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file); + +#endif /* !__MOCK_DRM_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c new file mode 100644 index 000000000000..8d5ba037064c --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -0,0 +1,207 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_engine.h" +#include "mock_request.h" + +static struct mock_request *first_request(struct mock_engine *engine) +{ + return list_first_entry_or_null(&engine->hw_queue, + struct mock_request, + link); +} + +static void hw_delay_complete(unsigned long data) +{ + struct mock_engine *engine = (typeof(engine))data; + struct mock_request *request; + + spin_lock(&engine->hw_lock); + + request = first_request(engine); + if (request) { + list_del_init(&request->link); + mock_seqno_advance(&engine->base, request->base.global_seqno); + } + + request = first_request(engine); + if (request) + mod_timer(&engine->hw_delay, jiffies + request->delay); + + spin_unlock(&engine->hw_lock); +} + +static int mock_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_get(ctx); + return 0; +} + +static void mock_context_unpin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_put(ctx); +} + +static int mock_request_alloc(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + + INIT_LIST_HEAD(&mock->link); + mock->delay = 0; + + request->ring = request->engine->buffer; + return 0; +} + +static int mock_emit_flush(struct drm_i915_gem_request *request, + unsigned int flags) +{ + return 0; +} + +static void mock_emit_breadcrumb(struct drm_i915_gem_request *request, + u32 *flags) +{ +} + +static void mock_submit_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + + i915_gem_request_submit(request); + GEM_BUG_ON(!request->global_seqno); + + spin_lock_irq(&engine->hw_lock); + list_add_tail(&mock->link, &engine->hw_queue); + if (mock->link.prev == &engine->hw_queue) + mod_timer(&engine->hw_delay, jiffies + mock->delay); + spin_unlock_irq(&engine->hw_lock); +} + +static struct intel_ring *mock_ring(struct intel_engine_cs *engine) +{ + const unsigned long sz = roundup_pow_of_two(sizeof(struct intel_ring)); + struct intel_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + ring->engine = engine; + ring->size = sz; + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + + INIT_LIST_HEAD(&ring->request_list); + ring->last_retired_head = -1; + intel_ring_update_space(ring); + + return ring; +} + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name) +{ + struct mock_engine *engine; + static int id; + + engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); + if (!engine) + return NULL; + + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) { + kfree(engine); + return NULL; + } + + /* minimal engine setup for requests */ + engine->base.i915 = i915; + engine->base.name = name; + engine->base.id = id++; + engine->base.status_page.page_addr = (void *)(engine + 1); + + engine->base.context_pin = mock_context_pin; + engine->base.context_unpin = mock_context_unpin; + engine->base.request_alloc = mock_request_alloc; + engine->base.emit_flush = mock_emit_flush; + engine->base.emit_breadcrumb = mock_emit_breadcrumb; + engine->base.submit_request = mock_submit_request; + + engine->base.timeline = + &i915->gt.global_timeline.engine[engine->base.id]; + + intel_engine_init_breadcrumbs(&engine->base); + engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ + + /* fake hw queue */ + spin_lock_init(&engine->hw_lock); + setup_timer(&engine->hw_delay, + hw_delay_complete, + (unsigned long)engine); + INIT_LIST_HEAD(&engine->hw_queue); + + return &engine->base; +} + +void mock_engine_flush(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + struct mock_request *request, *rn; + + del_timer_sync(&mock->hw_delay); + + spin_lock_irq(&mock->hw_lock); + list_for_each_entry_safe(request, rn, &mock->hw_queue, link) { + list_del_init(&request->link); + mock_seqno_advance(&mock->base, request->base.global_seqno); + } + spin_unlock_irq(&mock->hw_lock); +} + +void mock_engine_reset(struct intel_engine_cs *engine) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); +} + +void mock_engine_free(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + + GEM_BUG_ON(timer_pending(&mock->hw_delay)); + + if (engine->last_retired_context) + engine->context_unpin(engine, engine->last_retired_context); + + intel_engine_fini_breadcrumbs(engine); + + kfree(engine->buffer); + kfree(engine); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h new file mode 100644 index 000000000000..e5e240216ba3 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_ENGINE_H__ +#define __MOCK_ENGINE_H__ + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/timer.h> + +#include "../intel_ringbuffer.h" + +struct mock_engine { + struct intel_engine_cs base; + + spinlock_t hw_lock; + struct list_head hw_queue; + struct timer_list hw_delay; +}; + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name); +void mock_engine_flush(struct intel_engine_cs *engine); +void mock_engine_reset(struct intel_engine_cs *engine); +void mock_engine_free(struct intel_engine_cs *engine); + +static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); + intel_engine_wakeup(engine); +} + +#endif /* !__MOCK_ENGINE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c new file mode 100644 index 000000000000..6a8258eacdcb --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -0,0 +1,226 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/pm_runtime.h> + +#include "mock_engine.h" +#include "mock_context.h" +#include "mock_request.h" +#include "mock_gem_device.h" +#include "mock_gem_object.h" +#include "mock_gtt.h" + +void mock_device_flush(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) + mock_engine_flush(engine); + + i915_gem_retire_requests(i915); +} + +static void mock_device_release(struct drm_device *dev) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + mutex_lock(&i915->drm.struct_mutex); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + + cancel_delayed_work_sync(&i915->gt.retire_work); + cancel_delayed_work_sync(&i915->gt.idle_work); + + mutex_lock(&i915->drm.struct_mutex); + for_each_engine(engine, i915, id) + mock_engine_free(engine); + i915_gem_context_fini(i915); + mutex_unlock(&i915->drm.struct_mutex); + + drain_workqueue(i915->wq); + i915_gem_drain_freed_objects(i915); + + mutex_lock(&i915->drm.struct_mutex); + mock_fini_ggtt(i915); + i915_gem_timeline_fini(&i915->gt.global_timeline); + mutex_unlock(&i915->drm.struct_mutex); + + destroy_workqueue(i915->wq); + + kmem_cache_destroy(i915->dependencies); + kmem_cache_destroy(i915->requests); + kmem_cache_destroy(i915->vmas); + kmem_cache_destroy(i915->objects); + + drm_dev_fini(&i915->drm); + put_device(&i915->drm.pdev->dev); +} + +static struct drm_driver mock_driver = { + .name = "mock", + .driver_features = DRIVER_GEM, + .release = mock_device_release, + + .gem_close_object = i915_gem_close_object, + .gem_free_object_unlocked = i915_gem_free_object, +}; + +static void release_dev(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + kfree(pdev); +} + +static void mock_retire_work_handler(struct work_struct *work) +{ +} + +static void mock_idle_work_handler(struct work_struct *work) +{ +} + +struct drm_i915_private *mock_gem_device(void) +{ + struct drm_i915_private *i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct pci_dev *pdev; + int err; + + pdev = kzalloc(sizeof(*pdev) + sizeof(*i915), GFP_KERNEL); + if (!pdev) + goto err; + + device_initialize(&pdev->dev); + pdev->dev.release = release_dev; + dev_set_name(&pdev->dev, "mock"); + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + i915 = (struct drm_i915_private *)(pdev + 1); + pci_set_drvdata(pdev, i915); + + err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev); + if (err) { + pr_err("Failed to initialise mock GEM device: err=%d\n", err); + goto put_device; + } + i915->drm.pdev = pdev; + i915->drm.dev_private = i915; + + /* Using the global GTT may ask questions about KMS users, so prepare */ + drm_mode_config_init(&i915->drm); + + mkwrite_device_info(i915)->gen = -1; + + spin_lock_init(&i915->mm.object_stat_lock); + + init_waitqueue_head(&i915->gpu_error.wait_queue); + init_waitqueue_head(&i915->gpu_error.reset_queue); + + i915->wq = alloc_ordered_workqueue("mock", 0); + if (!i915->wq) + goto put_device; + + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + init_llist_head(&i915->mm.free_list); + INIT_LIST_HEAD(&i915->mm.unbound_list); + INIT_LIST_HEAD(&i915->mm.bound_list); + + ida_init(&i915->context_hw_ida); + + INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler); + INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler); + + i915->gt.awake = true; + + i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN); + if (!i915->objects) + goto err_wq; + + i915->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); + if (!i915->vmas) + goto err_objects; + + i915->requests = KMEM_CACHE(mock_request, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU); + if (!i915->requests) + goto err_vmas; + + i915->dependencies = KMEM_CACHE(i915_dependency, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT); + if (!i915->dependencies) + goto err_requests; + + mutex_lock(&i915->drm.struct_mutex); + INIT_LIST_HEAD(&i915->gt.timelines); + err = i915_gem_timeline_init__global(i915); + if (err) { + mutex_unlock(&i915->drm.struct_mutex); + goto err_dependencies; + } + + mock_init_ggtt(i915); + mutex_unlock(&i915->drm.struct_mutex); + + mkwrite_device_info(i915)->ring_mask = BIT(0); + i915->engine[RCS] = mock_engine(i915, "mock"); + if (!i915->engine[RCS]) + goto err_dependencies; + + i915->kernel_context = mock_context(i915, NULL); + if (!i915->kernel_context) + goto err_engine; + + return i915; + +err_engine: + for_each_engine(engine, i915, id) + mock_engine_free(engine); +err_dependencies: + kmem_cache_destroy(i915->dependencies); +err_requests: + kmem_cache_destroy(i915->requests); +err_vmas: + kmem_cache_destroy(i915->vmas); +err_objects: + kmem_cache_destroy(i915->objects); +err_wq: + destroy_workqueue(i915->wq); +put_device: + put_device(&pdev->dev); +err: + return NULL; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h new file mode 100644 index 000000000000..4cca4d57f52c --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -0,0 +1,9 @@ +#ifndef __MOCK_GEM_DEVICE_H__ +#define __MOCK_GEM_DEVICE_H__ + +struct drm_i915_private; + +struct drm_i915_private *mock_gem_device(void); +void mock_device_flush(struct drm_i915_private *i915); + +#endif /* !__MOCK_GEM_DEVICE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/selftests/mock_gem_object.h new file mode 100644 index 000000000000..9fbf67321662 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_object.h @@ -0,0 +1,8 @@ +#ifndef __MOCK_GEM_OBJECT_H__ +#define __MOCK_GEM_OBJECT_H__ + +struct mock_object { + struct drm_i915_gem_object base; +}; + +#endif /* !__MOCK_GEM_OBJECT_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c new file mode 100644 index 000000000000..a61309c7cb3e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -0,0 +1,138 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_gtt.h" + +static void mock_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ +} + +static void mock_insert_entries(struct i915_address_space *vm, + struct sg_table *st, + u64 start, + enum i915_cache_level level, u32 flags) +{ +} + +static int mock_bind_ppgtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); + vma->pages = vma->obj->mm.pages; + vma->flags |= I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ppgtt(struct i915_vma *vma) +{ +} + +static void mock_cleanup(struct i915_address_space *vm) +{ +} + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name) +{ + struct i915_hw_ppgtt *ppgtt; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return NULL; + + kref_init(&ppgtt->ref); + ppgtt->base.i915 = i915; + ppgtt->base.total = round_down(U64_MAX, PAGE_SIZE); + ppgtt->base.file = ERR_PTR(-ENODEV); + + INIT_LIST_HEAD(&ppgtt->base.active_list); + INIT_LIST_HEAD(&ppgtt->base.inactive_list); + INIT_LIST_HEAD(&ppgtt->base.unbound_list); + + INIT_LIST_HEAD(&ppgtt->base.global_link); + drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); + i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); + + ppgtt->base.clear_range = nop_clear_range; + ppgtt->base.insert_page = mock_insert_page; + ppgtt->base.insert_entries = mock_insert_entries; + ppgtt->base.bind_vma = mock_bind_ppgtt; + ppgtt->base.unbind_vma = mock_unbind_ppgtt; + ppgtt->base.cleanup = mock_cleanup; + + return ppgtt; +} + +static int mock_bind_ggtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + int err; + + err = i915_get_ggtt_vma_pages(vma); + if (err) + return err; + + vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ggtt(struct i915_vma *vma) +{ +} + +void mock_init_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + INIT_LIST_HEAD(&i915->vm_list); + + ggtt->base.i915 = i915; + + ggtt->mappable_base = 0; + ggtt->mappable_end = 2048 * PAGE_SIZE; + ggtt->base.total = 4096 * PAGE_SIZE; + + ggtt->base.clear_range = nop_clear_range; + ggtt->base.insert_page = mock_insert_page; + ggtt->base.insert_entries = mock_insert_entries; + ggtt->base.bind_vma = mock_bind_ggtt; + ggtt->base.unbind_vma = mock_unbind_ggtt; + ggtt->base.cleanup = mock_cleanup; + + i915_address_space_init(&ggtt->base, i915, "global"); +} + +void mock_fini_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + i915_address_space_fini(&ggtt->base); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h new file mode 100644 index 000000000000..9a0a833bb545 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_GTT_H +#define __MOCK_GTT_H + +void mock_init_ggtt(struct drm_i915_private *i915); +void mock_fini_ggtt(struct drm_i915_private *i915); + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name); + +#endif /* !__MOCK_GTT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c new file mode 100644 index 000000000000..0e8d2e7f8c70 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -0,0 +1,63 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_engine.h" +#include "mock_request.h" + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay) +{ + struct drm_i915_gem_request *request; + struct mock_request *mock; + + /* NB the i915->requests slab cache is enlarged to fit mock_request */ + request = i915_gem_request_alloc(engine, context); + if (!request) + return NULL; + + mock = container_of(request, typeof(*mock), base); + mock->delay = delay; + + return &mock->base; +} + +bool mock_cancel_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + bool was_queued; + + spin_lock_irq(&engine->hw_lock); + was_queued = !list_empty(&mock->link); + list_del_init(&mock->link); + spin_unlock_irq(&engine->hw_lock); + + if (was_queued) + i915_gem_request_unsubmit(request); + + return was_queued; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h new file mode 100644 index 000000000000..4dea74c8e96d --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -0,0 +1,46 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_REQUEST__ +#define __MOCK_REQUEST__ + +#include <linux/list.h> + +#include "../i915_gem_request.h" + +struct mock_request { + struct drm_i915_gem_request base; + + struct list_head link; + unsigned long delay; +}; + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay); + +bool mock_cancel_request(struct drm_i915_gem_request *request); + +#endif /* !__MOCK_REQUEST__ */ diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c new file mode 100644 index 000000000000..eb2cda8e2b9f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -0,0 +1,355 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/prime_numbers.h> +#include <linux/random.h> + +#include "../i915_selftest.h" + +#define PFN_BIAS (1 << 10) + +struct pfn_table { + struct sg_table st; + unsigned long start, end; +}; + +typedef unsigned int (*npages_fn_t)(unsigned long n, + unsigned long count, + struct rnd_state *rnd); + +static noinline int expect_pfn_sg(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + struct scatterlist *sg; + unsigned long pfn, n; + + pfn = pt->start; + for_each_sg(pt->st.sgl, sg, pt->st.nents, n) { + struct page *page = sg_page(sg); + unsigned int npages = npages_fn(n, pt->st.nents, rnd); + + if (page_to_pfn(page) != pfn) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (sg->length != npages * PAGE_SIZE) { + pr_err("%s: %s copied wrong sg length, expected size %lu, found %u (using for_each_sg)\n", + __func__, who, npages * PAGE_SIZE, sg->length); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn += npages; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sg_page_iter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sg_page_iter sgiter; + unsigned long pfn; + + pfn = pt->start; + for_each_sg_page(pt->st.sgl, &sgiter, pt->st.nents, 0) { + struct page *page = sg_page_iter_page(&sgiter); + + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sgtiter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sgt_iter sgt; + struct page *page; + unsigned long pfn; + + pfn = pt->start; + for_each_sgt_page(page, sgt, &pt->st) { + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sgt_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static int expect_pfn_sgtable(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + int err; + + err = expect_pfn_sg(pt, npages_fn, rnd, who, timeout); + if (err) + return err; + + err = expect_pfn_sg_page_iter(pt, who, timeout); + if (err) + return err; + + err = expect_pfn_sgtiter(pt, who, timeout); + if (err) + return err; + + return 0; +} + +static unsigned int one(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1; +} + +static unsigned int grow(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return n + 1; +} + +static unsigned int shrink(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return count - n; +} + +static unsigned int random(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1 + (prandom_u32_state(rnd) % 1024); +} + +static int alloc_table(struct pfn_table *pt, + unsigned long count, unsigned long max, + npages_fn_t npages_fn, + struct rnd_state *rnd, + int alloc_error) +{ + struct scatterlist *sg; + unsigned long n, pfn; + + if (sg_alloc_table(&pt->st, max, + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN)) + return alloc_error; + + /* count should be less than 20 to prevent overflowing sg->length */ + GEM_BUG_ON(overflows_type(count * PAGE_SIZE, sg->length)); + + /* Construct a table where each scatterlist contains different number + * of entries. The idea is to check that we can iterate the individual + * pages from inside the coalesced lists. + */ + pt->start = PFN_BIAS; + pfn = pt->start; + sg = pt->st.sgl; + for (n = 0; n < count; n++) { + unsigned long npages = npages_fn(n, count, rnd); + + /* Nobody expects the Sparse Memmap! */ + if (pfn_to_page(pfn + npages) != pfn_to_page(pfn) + npages) { + sg_free_table(&pt->st); + return -ENOSPC; + } + + if (n) + sg = sg_next(sg); + sg_set_page(sg, pfn_to_page(pfn), npages * PAGE_SIZE, 0); + + GEM_BUG_ON(page_to_pfn(sg_page(sg)) != pfn); + GEM_BUG_ON(sg->length != npages * PAGE_SIZE); + GEM_BUG_ON(sg->offset != 0); + + pfn += npages; + } + sg_mark_end(sg); + pt->st.nents = n; + pt->end = pfn; + + return 0; +} + +static const npages_fn_t npages_funcs[] = { + one, + grow, + shrink, + random, + NULL, +}; + +static int igt_sg_alloc(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max_order = 20; /* approximating a 4GiB object */ + struct rnd_state prng; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max_order) { + unsigned long size = BIT(prime); + int offset; + + for (offset = -1; offset <= 1; offset++) { + unsigned long sz = size + offset; + const npages_fn_t *npages; + struct pfn_table pt; + int err; + + for (npages = npages_funcs; *npages; npages++) { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = alloc_table(&pt, sz, sz, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, *npages, &prng, + "sg_alloc_table", + end_time); + sg_free_table(&pt.st); + if (err) + return err; + } + } + + /* Test at least one continuation before accepting oom */ + if (size > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +static int igt_sg_trim(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max = PAGE_SIZE; /* not prime! */ + struct pfn_table pt; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max) { + const npages_fn_t *npages; + int err; + + for (npages = npages_funcs; *npages; npages++) { + struct rnd_state prng; + + prandom_seed_state(&prng, i915_selftest.random_seed); + err = alloc_table(&pt, prime, max, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + if (i915_sg_trim(&pt.st)) { + if (pt.st.orig_nents != prime || + pt.st.nents != prime) { + pr_err("i915_sg_trim failed (nents %u, orig_nents %u), expected %lu\n", + pt.st.nents, pt.st.orig_nents, prime); + err = -EINVAL; + } else { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, + *npages, &prng, + "i915_sg_trim", + end_time); + } + } + sg_free_table(&pt.st); + if (err) + return err; + } + + /* Test at least one continuation before accepting oom */ + if (prime > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +int scatterlist_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_sg_alloc), + SUBTEST(igt_sg_trim), + }; + + return i915_subtests(tests, NULL); +} |