diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 354 |
1 files changed, 257 insertions, 97 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 555b896d2bda..eadc15cddbeb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -67,7 +67,7 @@ static void skl_init_clock_gating(struct drm_device *dev) gen9_init_clock_gating(dev); - if (INTEL_REVID(dev) == SKL_REVID_A0) { + if (INTEL_REVID(dev) <= SKL_REVID_B0) { /* * WaDisableSDEUnitClockGating:skl * WaSetGAPSunitClckGateDisable:skl @@ -75,6 +75,10 @@ static void skl_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_GAPSUNIT_CLOCK_GATE_DISABLE | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableVFUnitClockGating:skl */ + I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) | + GEN6_VFUNIT_CLOCK_GATE_DISABLE); } if (INTEL_REVID(dev) <= SKL_REVID_D0) { @@ -84,8 +88,7 @@ static void skl_init_clock_gating(struct drm_device *dev) /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */ I915_WRITE(FF_SLICE_CS_CHICKEN2, - I915_READ(FF_SLICE_CS_CHICKEN2) | - GEN9_TSG_BARRIER_ACK_DISABLE); + _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE)); } if (INTEL_REVID(dev) <= SKL_REVID_E0) @@ -94,6 +97,26 @@ static void skl_init_clock_gating(struct drm_device *dev) GEN8_LQSC_RO_PERF_DIS); } +static void bxt_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + gen9_init_clock_gating(dev); + + /* + * FIXME: + * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only. + * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. + */ + /* WaDisableSDEUnitClockGating:bxt */ + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE | + GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); + + /* FIXME: apply on A0 only */ + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); +} + static void i915_pineview_get_mem_freq(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1792,7 +1815,7 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8, mode->crtc_clock); ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8, - intel_ddi_get_cdclk_freq(dev_priv)); + dev_priv->display.get_display_clock_speed(dev_priv->dev)); return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | PIPE_WM_LINETIME_TIME(linetime); @@ -1923,7 +1946,7 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5]) int ilk_wm_max_level(const struct drm_device *dev) { /* how many WM levels are we expecting */ - if (IS_GEN9(dev)) + if (INTEL_INFO(dev)->gen >= 9) return 7; else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) return 4; @@ -2536,6 +2559,7 @@ static bool ilk_disable_lp_wm(struct drm_device *dev) */ #define SKL_DDB_SIZE 896 /* in blocks */ +#define BXT_DDB_SIZE 512 static void skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, @@ -2554,7 +2578,10 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, return; } - ddb_size = SKL_DDB_SIZE; + if (IS_BROXTON(dev)) + ddb_size = BXT_DDB_SIZE; + else + ddb_size = SKL_DDB_SIZE; ddb_size -= 4; /* 4 blocks for bypass path allocation */ @@ -2610,8 +2637,18 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, } static unsigned int -skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p) +skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y) { + + /* for planar format */ + if (p->y_bytes_per_pixel) { + if (y) /* y-plane data rate */ + return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel; + else /* uv-plane data rate */ + return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel; + } + + /* for packed formats */ return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel; } @@ -2634,7 +2671,10 @@ skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc, if (!p->enabled) continue; - total_data_rate += skl_plane_relative_data_rate(p); + total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */ + if (p->y_bytes_per_pixel) { + total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */ + } } return total_data_rate; @@ -2653,6 +2693,7 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc, struct skl_ddb_entry *alloc = &ddb->pipe[pipe]; uint16_t alloc_size, start, cursor_blocks; uint16_t minimum[I915_MAX_PLANES]; + uint16_t y_minimum[I915_MAX_PLANES]; unsigned int total_data_rate; int plane; @@ -2681,6 +2722,8 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc, minimum[plane] = 8; alloc_size -= minimum[plane]; + y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0; + alloc_size -= y_minimum[plane]; } /* @@ -2694,16 +2737,17 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc, start = alloc->start; for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { const struct intel_plane_wm_parameters *p; - unsigned int data_rate; - uint16_t plane_blocks; + unsigned int data_rate, y_data_rate; + uint16_t plane_blocks, y_plane_blocks = 0; p = ¶ms->plane[plane]; if (!p->enabled) continue; - data_rate = skl_plane_relative_data_rate(p); + data_rate = skl_plane_relative_data_rate(p, 0); /* + * allocation for (packed formats) or (uv-plane part of planar format): * promote the expression to 64 bits to avoid overflowing, the * result is < available as data_rate / total_data_rate < 1 */ @@ -2715,6 +2759,22 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc, ddb->plane[pipe][plane].end = start + plane_blocks; start += plane_blocks; + + /* + * allocation for y_plane part of planar format: + */ + if (p->y_bytes_per_pixel) { + y_data_rate = skl_plane_relative_data_rate(p, 1); + y_plane_blocks = y_minimum[plane]; + y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, + total_data_rate); + + ddb->y_plane[pipe][plane].start = start; + ddb->y_plane[pipe][plane].end = start + y_plane_blocks; + + start += y_plane_blocks; + } + } } @@ -2827,13 +2887,18 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config); fb = crtc->primary->state->fb; + /* For planar: Bpp is for uv plane, y_Bpp is for y plane */ if (fb) { p->plane[0].enabled = true; - p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8; + p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ? + drm_format_plane_cpp(fb->pixel_format, 1) : fb->bits_per_pixel / 8; + p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ? + drm_format_plane_cpp(fb->pixel_format, 0) : 0; p->plane[0].tiling = fb->modifier[0]; } else { p->plane[0].enabled = false; p->plane[0].bytes_per_pixel = 0; + p->plane[0].y_bytes_per_pixel = 0; p->plane[0].tiling = DRM_FORMAT_MOD_NONE; } p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w; @@ -2841,6 +2906,7 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, p->plane[0].rotation = crtc->primary->state->rotation; fb = crtc->cursor->state->fb; + p->cursor.y_bytes_per_pixel = 0; if (fb) { p->cursor.enabled = true; p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8; @@ -2876,22 +2942,25 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t plane_bytes_per_line, plane_blocks_per_line; uint32_t res_blocks, res_lines; uint32_t selected_result; + uint8_t bytes_per_pixel; if (latency == 0 || !p->active || !p_params->enabled) return false; + bytes_per_pixel = p_params->y_bytes_per_pixel ? + p_params->y_bytes_per_pixel : + p_params->bytes_per_pixel; method1 = skl_wm_method1(p->pixel_rate, - p_params->bytes_per_pixel, + bytes_per_pixel, latency); method2 = skl_wm_method2(p->pixel_rate, p->pipe_htotal, p_params->horiz_pixels, - p_params->bytes_per_pixel, + bytes_per_pixel, p_params->tiling, latency); - plane_bytes_per_line = p_params->horiz_pixels * - p_params->bytes_per_pixel; + plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel; plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); if (p_params->tiling == I915_FORMAT_MOD_Y_TILED || @@ -3108,10 +3177,14 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv, new->plane_trans[pipe][i]); I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]); - for (i = 0; i < intel_num_planes(crtc); i++) + for (i = 0; i < intel_num_planes(crtc); i++) { skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, i), &new->ddb.plane[pipe][i]); + skl_ddb_entry_write(dev_priv, + PLANE_NV12_BUF_CFG(pipe, i), + &new->ddb.y_plane[pipe][i]); + } skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), &new->ddb.cursor[pipe]); @@ -3176,7 +3249,7 @@ static void skl_flush_wm_values(struct drm_i915_private *dev_priv, { struct drm_device *dev = dev_priv->dev; struct skl_ddb_allocation *cur_ddb, *new_ddb; - bool reallocated[I915_MAX_PIPES] = {false, false, false}; + bool reallocated[I915_MAX_PIPES] = {}; struct intel_crtc *crtc; enum pipe pipe; @@ -3269,6 +3342,7 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc, return false; intel_crtc->wm.skl_active = *pipe_wm; + return true; } @@ -3362,8 +3436,16 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc, intel_plane->wm.scaled = scaled; intel_plane->wm.horiz_pixels = sprite_width; intel_plane->wm.vert_pixels = sprite_height; - intel_plane->wm.bytes_per_pixel = pixel_size; intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE; + + /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */ + intel_plane->wm.bytes_per_pixel = + (fb && fb->pixel_format == DRM_FORMAT_NV12) ? + drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size; + intel_plane->wm.y_bytes_per_pixel = + (fb && fb->pixel_format == DRM_FORMAT_NV12) ? + drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0; + /* * Framebuffer can be NULL on plane disable, but it does not * matter for watermarks if we assume no tiling in that case. @@ -3928,6 +4010,8 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_DOWN_IDLE_AVG); dev_priv->rps.power = new_power; + dev_priv->rps.up_threshold = threshold_up; + dev_priv->rps.down_threshold = threshold_down; dev_priv->rps.last_adj = 0; } @@ -3999,8 +4083,11 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val) "Odd GPU freq value\n")) val &= ~1; - if (val != dev_priv->rps.cur_freq) + if (val != dev_priv->rps.cur_freq) { vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + if (!IS_CHERRYVIEW(dev_priv)) + gen6_set_rps_thresholds(dev_priv, val); + } I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); @@ -4008,50 +4095,25 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val) trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); } -/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down +/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down * * * If Gfx is Idle, then - * 1. Mask Turbo interrupts - * 2. Bring up Gfx clock - * 3. Change the freq to Rpn and wait till P-Unit updates freq - * 4. Clear the Force GFX CLK ON bit so that Gfx can down - * 5. Unmask Turbo interrupts + * 1. Forcewake Media well. + * 2. Request idle freq. + * 3. Release Forcewake of Media well. */ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; u32 val = dev_priv->rps.idle_freq; - /* CHV and latest VLV don't need to force the gfx clock */ - if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) { - valleyview_set_rps(dev_priv->dev, val); - return; - } - - /* - * When we are idle. Drop to min voltage state. - */ - if (dev_priv->rps.cur_freq <= val) return; - /* Mask turbo interrupt so that they will not come in between */ - I915_WRITE(GEN6_PMINTRMSK, - gen6_sanitize_rps_pm_mask(dev_priv, ~0)); - - vlv_force_gfx_clock(dev_priv, true); - - dev_priv->rps.cur_freq = val; - - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - - if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) - & GENFREQSTATUS) == 0, 100)) - DRM_ERROR("timed out waiting for Punit\n"); - - vlv_force_gfx_clock(dev_priv, false); - - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + /* Wake up the media well, as that takes a lot less + * power than the Render well. */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); + valleyview_set_rps(dev_priv->dev, val); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); } void gen6_rps_busy(struct drm_i915_private *dev_priv) @@ -4080,21 +4142,47 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); } mutex_unlock(&dev_priv->rps.hw_lock); + + spin_lock(&dev_priv->rps.client_lock); + while (!list_empty(&dev_priv->rps.clients)) + list_del_init(dev_priv->rps.clients.next); + spin_unlock(&dev_priv->rps.client_lock); } -void gen6_rps_boost(struct drm_i915_private *dev_priv) +void gen6_rps_boost(struct drm_i915_private *dev_priv, + struct intel_rps_client *rps, + unsigned long submitted) { - u32 val; + /* This is intentionally racy! We peek at the state here, then + * validate inside the RPS worker. + */ + if (!(dev_priv->mm.busy && + dev_priv->rps.enabled && + dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) + return; - mutex_lock(&dev_priv->rps.hw_lock); - val = dev_priv->rps.max_freq_softlimit; - if (dev_priv->rps.enabled && - dev_priv->mm.busy && - dev_priv->rps.cur_freq < val) { - intel_set_rps(dev_priv->dev, val); - dev_priv->rps.last_adj = 0; + /* Force a RPS boost (and don't count it against the client) if + * the GPU is severely congested. + */ + if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) + rps = NULL; + + spin_lock(&dev_priv->rps.client_lock); + if (rps == NULL || list_empty(&rps->link)) { + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->rps.interrupts_enabled) { + dev_priv->rps.client_boost = true; + queue_work(dev_priv->wq, &dev_priv->rps.work); + } + spin_unlock_irq(&dev_priv->irq_lock); + + if (rps != NULL) { + list_add(&rps->link, &dev_priv->rps.clients); + rps->boosts++; + } else + dev_priv->rps.boosts++; } - mutex_unlock(&dev_priv->rps.hw_lock); + spin_unlock(&dev_priv->rps.client_lock); } void intel_set_rps(struct drm_device *dev, u8 val) @@ -4248,8 +4336,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev) if (dev_priv->rps.min_freq_softlimit == 0) { if (IS_HASWELL(dev) || IS_BROADWELL(dev)) dev_priv->rps.min_freq_softlimit = - /* max(RPe, 450 MHz) */ - max(dev_priv->rps.efficient_freq, (u8) 9); + max_t(int, dev_priv->rps.efficient_freq, + intel_freq_opcode(dev_priv, 450)); else dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; @@ -4323,8 +4411,13 @@ static void gen9_enable_rc6(struct drm_device *dev) GEN6_RC_CTL_EI_MODE(1) | rc6_mask); - /* 3b: Enable Coarse Power Gating only when RC6 is enabled */ - I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 3 : 0); + /* + * 3b: Enable Coarse Power Gating only when RC6 is enabled. + * WaDisableRenderPowerGating:skl,bxt - Render PG need to be disabled with RC6. + */ + I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? + GEN9_MEDIA_PG_ENABLE : 0); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -4663,24 +4756,6 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) return rp1; } -static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv) -{ - struct drm_device *dev = dev_priv->dev; - u32 val, rpn; - - if (dev->pdev->revision >= 0x20) { - val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); - rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & - FB_GFX_FREQ_FUSE_MASK); - } else { /* For pre-production hardware */ - val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); - rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & - PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK); - } - - return rpn; -} - static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) { u32 val, rp1; @@ -4887,9 +4962,9 @@ static void cherryview_init_gt_powersave(struct drm_device *dev) mutex_lock(&dev_priv->rps.hw_lock); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); switch ((val >> 2) & 0x7) { case 0: @@ -4932,7 +5007,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev) intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), dev_priv->rps.rp1_freq); - dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); + /* PUnit validated range is only [RPe, RP0] */ + dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); @@ -4994,8 +5070,8 @@ static void cherryview_enable_rps(struct drm_device *dev) I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); I915_WRITE(GEN6_RC_SLEEP, 0); - /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */ - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); + /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ + I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); /* allows RC6 residency counter to work */ I915_WRITE(VLV_COUNTER_CONTROL, @@ -5030,6 +5106,12 @@ static void cherryview_enable_rps(struct drm_device *dev) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); + /* Setting Fixed Bias */ + val = VLV_OVERRIDE_EN | + VLV_SOC_TDP_EN | + CHV_BIAS_CPU_50_SOC_50; + vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); /* RPS code assumes GPLL is used */ @@ -5114,6 +5196,12 @@ static void valleyview_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RC_CONTROL, rc6_mode); + /* Setting Fixed Bias */ + val = VLV_OVERRIDE_EN | + VLV_SOC_TDP_EN | + VLV_BIAS_CPU_125_SOC_875; + vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); /* RPS code assumes GPLL is used */ @@ -5796,13 +5884,15 @@ static void ibx_init_clock_gating(struct drm_device *dev) static void g4x_disable_trickle_feed(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int pipe; + enum pipe pipe; for_each_pipe(dev_priv, pipe) { I915_WRITE(DSPCNTR(pipe), I915_READ(DSPCNTR(pipe)) | DISPPLANE_TRICKLE_FEED_DISABLE); - intel_flush_primary_plane(dev_priv, pipe); + + I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); + POSTING_READ(DSPSURF(pipe)); } } @@ -6092,10 +6182,9 @@ static void broadwell_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; enum pipe pipe; + uint32_t misccpctl; - I915_WRITE(WM3_LP_ILK, 0); - I915_WRITE(WM2_LP_ILK, 0); - I915_WRITE(WM1_LP_ILK, 0); + ilk_init_lp_watermarks(dev); /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -6124,6 +6213,22 @@ static void broadwell_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + /* + * WaProgramL3SqcReg1Default:bdw + * WaTempDisableDOPClkGating:bdw + */ + misccpctl = I915_READ(GEN7_MISCCPCTL); + I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); + I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT); + I915_WRITE(GEN7_MISCCPCTL, misccpctl); + + /* + * WaGttCachingOffByDefault:bdw + * GTT cache may not work with big pages, so if those + * are ever enabled GTT cache may need to be disabled. + */ + I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); + lpt_init_clock_gating(dev); } @@ -6399,6 +6504,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev) /* WaDisableSDEUnitClockGating:chv */ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + + /* + * GTT cache may not work with big pages, so if those + * are ever enabled GTT cache may need to be disabled. + */ + I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); } static void g4x_init_clock_gating(struct drm_device *dev) @@ -6542,7 +6653,12 @@ void intel_init_pm(struct drm_device *dev) if (INTEL_INFO(dev)->gen >= 9) { skl_setup_wm_latency(dev); - dev_priv->display.init_clock_gating = skl_init_clock_gating; + if (IS_BROXTON(dev)) + dev_priv->display.init_clock_gating = + bxt_init_clock_gating; + else if (IS_SKYLAKE(dev)) + dev_priv->display.init_clock_gating = + skl_init_clock_gating; dev_priv->display.update_wm = skl_update_wm; dev_priv->display.update_sprite_wm = skl_update_sprite_wm; } else if (HAS_PCH_SPLIT(dev)) { @@ -6760,14 +6876,58 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) return val / GT_FREQUENCY_MULTIPLIER; } +struct request_boost { + struct work_struct work; + struct drm_i915_gem_request *req; +}; + +static void __intel_rps_boost_work(struct work_struct *work) +{ + struct request_boost *boost = container_of(work, struct request_boost, work); + struct drm_i915_gem_request *req = boost->req; + + if (!i915_gem_request_completed(req, true)) + gen6_rps_boost(to_i915(req->ring->dev), NULL, + req->emitted_jiffies); + + i915_gem_request_unreference__unlocked(req); + kfree(boost); +} + +void intel_queue_rps_boost_for_request(struct drm_device *dev, + struct drm_i915_gem_request *req) +{ + struct request_boost *boost; + + if (req == NULL || INTEL_INFO(dev)->gen < 6) + return; + + if (i915_gem_request_completed(req, true)) + return; + + boost = kmalloc(sizeof(*boost), GFP_ATOMIC); + if (boost == NULL) + return; + + i915_gem_request_reference(req); + boost->req = req; + + INIT_WORK(&boost->work, __intel_rps_boost_work); + queue_work(to_i915(dev)->wq, &boost->work); +} + void intel_pm_setup(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; mutex_init(&dev_priv->rps.hw_lock); + spin_lock_init(&dev_priv->rps.client_lock); INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, intel_gen6_powersave_work); + INIT_LIST_HEAD(&dev_priv->rps.clients); + INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); + INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); dev_priv->pm.suspended = false; } |