diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 508 |
1 files changed, 328 insertions, 180 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c8f744c418f0..45f71e6dc544 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -345,6 +345,16 @@ bool intel_fbc_enabled(struct drm_device *dev) return dev_priv->display.fbc_enabled(dev); } +void gen8_fbc_sw_flush(struct drm_device *dev, u32 value) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (!IS_GEN8(dev)) + return; + + I915_WRITE(MSG_FBC_REND_STATE, value); +} + static void intel_fbc_work_fn(struct work_struct *__work) { struct intel_fbc_work *work = @@ -581,6 +591,12 @@ void intel_update_fbc(struct drm_device *dev) DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n"); goto out_disable; } + if (INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev) && + to_intel_plane(crtc->primary)->rotation != BIT(DRM_ROTATE_0)) { + if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE)) + DRM_DEBUG_KMS("Rotation unsupported, disabling\n"); + goto out_disable; + } /* If the kernel debugger is active, always disable compression */ if (in_dbg_master()) @@ -856,7 +872,7 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) * A value of 5us seems to be a good balance; safe for very low end * platforms but not overly aggressive on lower latency configs. */ -static const int latency_ns = 5000; +static const int pessimal_latency_ns = 5000; static int i9xx_get_fifo_size(struct drm_device *dev, int plane) { @@ -985,13 +1001,20 @@ static const struct intel_watermark_params i915_wm_info = { .guard_size = 2, .cacheline_size = I915_FIFO_LINE_SIZE, }; -static const struct intel_watermark_params i830_wm_info = { +static const struct intel_watermark_params i830_a_wm_info = { .fifo_size = I855GM_FIFO_SIZE, .max_wm = I915_MAX_WM, .default_wm = 1, .guard_size = 2, .cacheline_size = I830_FIFO_LINE_SIZE, }; +static const struct intel_watermark_params i830_bc_wm_info = { + .fifo_size = I855GM_FIFO_SIZE, + .max_wm = I915_MAX_WM/2, + .default_wm = 1, + .guard_size = 2, + .cacheline_size = I830_FIFO_LINE_SIZE, +}; static const struct intel_watermark_params i845_wm_info = { .fifo_size = I830_FIFO_SIZE, .max_wm = I915_MAX_WM, @@ -1364,14 +1387,14 @@ static void valleyview_update_wm(struct drm_crtc *crtc) vlv_update_drain_latency(crtc); if (g4x_compute_wm0(dev, PIPE_A, - &valleyview_wm_info, latency_ns, - &valleyview_cursor_wm_info, latency_ns, + &valleyview_wm_info, pessimal_latency_ns, + &valleyview_cursor_wm_info, pessimal_latency_ns, &planea_wm, &cursora_wm)) enabled |= 1 << PIPE_A; if (g4x_compute_wm0(dev, PIPE_B, - &valleyview_wm_info, latency_ns, - &valleyview_cursor_wm_info, latency_ns, + &valleyview_wm_info, pessimal_latency_ns, + &valleyview_cursor_wm_info, pessimal_latency_ns, &planeb_wm, &cursorb_wm)) enabled |= 1 << PIPE_B; @@ -1430,20 +1453,20 @@ static void cherryview_update_wm(struct drm_crtc *crtc) vlv_update_drain_latency(crtc); if (g4x_compute_wm0(dev, PIPE_A, - &valleyview_wm_info, latency_ns, - &valleyview_cursor_wm_info, latency_ns, + &valleyview_wm_info, pessimal_latency_ns, + &valleyview_cursor_wm_info, pessimal_latency_ns, &planea_wm, &cursora_wm)) enabled |= 1 << PIPE_A; if (g4x_compute_wm0(dev, PIPE_B, - &valleyview_wm_info, latency_ns, - &valleyview_cursor_wm_info, latency_ns, + &valleyview_wm_info, pessimal_latency_ns, + &valleyview_cursor_wm_info, pessimal_latency_ns, &planeb_wm, &cursorb_wm)) enabled |= 1 << PIPE_B; if (g4x_compute_wm0(dev, PIPE_C, - &valleyview_wm_info, latency_ns, - &valleyview_cursor_wm_info, latency_ns, + &valleyview_wm_info, pessimal_latency_ns, + &valleyview_cursor_wm_info, pessimal_latency_ns, &planec_wm, &cursorc_wm)) enabled |= 1 << PIPE_C; @@ -1536,14 +1559,14 @@ static void g4x_update_wm(struct drm_crtc *crtc) bool cxsr_enabled; if (g4x_compute_wm0(dev, PIPE_A, - &g4x_wm_info, latency_ns, - &g4x_cursor_wm_info, latency_ns, + &g4x_wm_info, pessimal_latency_ns, + &g4x_cursor_wm_info, pessimal_latency_ns, &planea_wm, &cursora_wm)) enabled |= 1 << PIPE_A; if (g4x_compute_wm0(dev, PIPE_B, - &g4x_wm_info, latency_ns, - &g4x_cursor_wm_info, latency_ns, + &g4x_wm_info, pessimal_latency_ns, + &g4x_cursor_wm_info, pessimal_latency_ns, &planeb_wm, &cursorb_wm)) enabled |= 1 << PIPE_B; @@ -1673,7 +1696,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) else if (!IS_GEN2(dev)) wm_info = &i915_wm_info; else - wm_info = &i830_wm_info; + wm_info = &i830_a_wm_info; fifo_size = dev_priv->display.get_fifo_size(dev, 0); crtc = intel_get_crtc_for_plane(dev, 0); @@ -1686,10 +1709,16 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode; planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, wm_info, fifo_size, cpp, - latency_ns); + pessimal_latency_ns); enabled = crtc; - } else + } else { planea_wm = fifo_size - wm_info->guard_size; + if (planea_wm > (long)wm_info->max_wm) + planea_wm = wm_info->max_wm; + } + + if (IS_GEN2(dev)) + wm_info = &i830_bc_wm_info; fifo_size = dev_priv->display.get_fifo_size(dev, 1); crtc = intel_get_crtc_for_plane(dev, 1); @@ -1702,13 +1731,16 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode; planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock, wm_info, fifo_size, cpp, - latency_ns); + pessimal_latency_ns); if (enabled == NULL) enabled = crtc; else enabled = NULL; - } else + } else { planeb_wm = fifo_size - wm_info->guard_size; + if (planeb_wm > (long)wm_info->max_wm) + planeb_wm = wm_info->max_wm; + } DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); @@ -1795,7 +1827,7 @@ static void i845_update_wm(struct drm_crtc *unused_crtc) planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, &i845_wm_info, dev_priv->display.get_fifo_size(dev, 0), - 4, latency_ns); + 4, pessimal_latency_ns); fwater_lo = I915_READ(FW_BLC) & ~0xfff; fwater_lo |= (3<<8) | planea_wm; @@ -2242,7 +2274,6 @@ int ilk_wm_max_level(const struct drm_device *dev) else return 2; } - static void intel_print_wm_latency(struct drm_device *dev, const char *name, const uint16_t wm[5]) @@ -2648,7 +2679,7 @@ static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev, #define WM_DIRTY_FBC (1 << 24) #define WM_DIRTY_DDB (1 << 25) -static unsigned int ilk_compute_wm_dirty(struct drm_device *dev, +static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv, const struct ilk_wm_values *old, const struct ilk_wm_values *new) { @@ -2656,7 +2687,7 @@ static unsigned int ilk_compute_wm_dirty(struct drm_device *dev, enum pipe pipe; int wm_lp; - for_each_pipe(pipe) { + for_each_pipe(dev_priv, pipe) { if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) { dirty |= WM_DIRTY_LINETIME(pipe); /* Must disable LP1+ watermarks too */ @@ -2742,7 +2773,7 @@ static void ilk_write_wm_values(struct drm_i915_private *dev_priv, unsigned int dirty; uint32_t val; - dirty = ilk_compute_wm_dirty(dev, previous, results); + dirty = ilk_compute_wm_dirty(dev_priv, previous, results); if (!dirty) return; @@ -3211,6 +3242,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) { int new_power; + if (dev_priv->rps.is_bdw_sw_turbo) + return; + new_power = dev_priv->rps.power; switch (dev_priv->rps.power) { case LOW_POWER: @@ -3418,8 +3452,11 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); else if (IS_VALLEYVIEW(dev)) vlv_set_rps_idle(dev_priv); - else + else if (!dev_priv->rps.is_bdw_sw_turbo + || atomic_read(&dev_priv->rps.sw_turbo.flip_received)){ gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + } + dev_priv->rps.last_adj = 0; } mutex_unlock(&dev_priv->rps.hw_lock); @@ -3433,8 +3470,11 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv) if (dev_priv->rps.enabled) { if (IS_VALLEYVIEW(dev)) valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); - else + else if (!dev_priv->rps.is_bdw_sw_turbo + || atomic_read(&dev_priv->rps.sw_turbo.flip_received)){ gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); + } + dev_priv->rps.last_adj = 0; } mutex_unlock(&dev_priv->rps.hw_lock); @@ -3453,6 +3493,10 @@ void valleyview_set_rps(struct drm_device *dev, u8 val) dev_priv->rps.cur_freq, vlv_gpu_freq(dev_priv, val), val); + if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), + "Odd GPU freq value\n")) + val &= ~1; + if (val != dev_priv->rps.cur_freq) vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); @@ -3465,21 +3509,26 @@ void valleyview_set_rps(struct drm_device *dev, u8 val) static void gen8_disable_rps_interrupts(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + if (IS_BROADWELL(dev) && dev_priv->rps.is_bdw_sw_turbo){ + if (atomic_read(&dev_priv->rps.sw_turbo.flip_received)) + del_timer(&dev_priv->rps.sw_turbo.flip_timer); + dev_priv-> rps.is_bdw_sw_turbo = false; + } else { + I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); + I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) & + ~dev_priv->pm_rps_events); + /* Complete PM interrupt masking here doesn't race with the rps work + * item again unmasking PM interrupts because that is using a different + * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in + * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which + * gen8_enable_rps will clean up. */ - I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); - I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) & - ~dev_priv->pm_rps_events); - /* Complete PM interrupt masking here doesn't race with the rps work - * item again unmasking PM interrupts because that is using a different - * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in - * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which - * gen8_enable_rps will clean up. */ - - spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.pm_iir = 0; - spin_unlock_irq(&dev_priv->irq_lock); + spin_lock_irq(&dev_priv->irq_lock); + dev_priv->rps.pm_iir = 0; + spin_unlock_irq(&dev_priv->irq_lock); - I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); + I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); + } } static void gen6_disable_rps_interrupts(struct drm_device *dev) @@ -3527,8 +3576,14 @@ static void valleyview_disable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + /* we're doing forcewake before Disabling RC6, + * This what the BIOS expects when going into suspend */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + I915_WRITE(GEN6_RC_CONTROL, 0); + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + gen6_disable_rps_interrupts(dev); } @@ -3631,13 +3686,111 @@ static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 rp_state_c dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; } +static void bdw_sw_calculate_freq(struct drm_device *dev, + struct intel_rps_bdw_cal *c, u32 *cur_time, u32 *c0) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u64 busy = 0; + u32 busyness_pct = 0; + u32 elapsed_time = 0; + u16 new_freq = 0; + + if (!c || !cur_time || !c0) + return; + + if (0 == c->last_c0) + goto out; + + /* Check Evaluation interval */ + elapsed_time = *cur_time - c->last_ts; + if (elapsed_time < c->eval_interval) + return; + + mutex_lock(&dev_priv->rps.hw_lock); + + /* + * c0 unit in 32*1.28 usec, elapsed_time unit in 1 usec. + * Whole busyness_pct calculation should be + * busy = ((u64)(*c0 - c->last_c0) << 5 << 7) / 100; + * busyness_pct = (u32)(busy * 100 / elapsed_time); + * The final formula is to simplify CPU calculation + */ + busy = (u64)(*c0 - c->last_c0) << 12; + do_div(busy, elapsed_time); + busyness_pct = (u32)busy; + + if (c->is_up && busyness_pct >= c->it_threshold_pct) + new_freq = (u16)dev_priv->rps.cur_freq + 3; + if (!c->is_up && busyness_pct <= c->it_threshold_pct) + new_freq = (u16)dev_priv->rps.cur_freq - 1; + + /* Adjust to new frequency busyness and compare with threshold */ + if (0 != new_freq) { + if (new_freq > dev_priv->rps.max_freq_softlimit) + new_freq = dev_priv->rps.max_freq_softlimit; + else if (new_freq < dev_priv->rps.min_freq_softlimit) + new_freq = dev_priv->rps.min_freq_softlimit; + + gen6_set_rps(dev, new_freq); + } + + mutex_unlock(&dev_priv->rps.hw_lock); + +out: + c->last_c0 = *c0; + c->last_ts = *cur_time; +} + +static void gen8_set_frequency_RP0(struct work_struct *work) +{ + struct intel_rps_bdw_turbo *p_bdw_turbo = + container_of(work, struct intel_rps_bdw_turbo, work_max_freq); + struct intel_gen6_power_mgmt *p_power_mgmt = + container_of(p_bdw_turbo, struct intel_gen6_power_mgmt, sw_turbo); + struct drm_i915_private *dev_priv = + container_of(p_power_mgmt, struct drm_i915_private, rps); + + mutex_lock(&dev_priv->rps.hw_lock); + gen6_set_rps(dev_priv->dev, dev_priv->rps.rp0_freq); + mutex_unlock(&dev_priv->rps.hw_lock); +} + +static void flip_active_timeout_handler(unsigned long var) +{ + struct drm_i915_private *dev_priv = (struct drm_i915_private *) var; + + del_timer(&dev_priv->rps.sw_turbo.flip_timer); + atomic_set(&dev_priv->rps.sw_turbo.flip_received, false); + + queue_work(dev_priv->wq, &dev_priv->rps.sw_turbo.work_max_freq); +} + +void bdw_software_turbo(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + u32 current_time = I915_READ(TIMESTAMP_CTR); /* unit in usec */ + u32 current_c0 = I915_READ(MCHBAR_PCU_C0); /* unit in 32*1.28 usec */ + + bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.up, + ¤t_time, ¤t_c0); + bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.down, + ¤t_time, ¤t_c0); +} + static void gen8_enable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; uint32_t rc6_mask = 0, rp_state_cap; + uint32_t threshold_up_pct, threshold_down_pct; + uint32_t ei_up, ei_down; /* up and down evaluation interval */ + u32 rp_ctl_flag; int unused; + /* Use software Turbo for BDW */ + dev_priv->rps.is_bdw_sw_turbo = IS_BROADWELL(dev); + /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); @@ -3681,35 +3834,74 @@ static void gen8_enable_rps(struct drm_device *dev) HSW_FREQUENCY(dev_priv->rps.rp1_freq)); I915_WRITE(GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(dev_priv->rps.rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ + ei_up = 84480; /* 84.48ms */ + ei_down = 448000; + threshold_up_pct = 90; /* x percent busy */ + threshold_down_pct = 70; + + if (dev_priv->rps.is_bdw_sw_turbo) { + dev_priv->rps.sw_turbo.up.it_threshold_pct = threshold_up_pct; + dev_priv->rps.sw_turbo.up.eval_interval = ei_up; + dev_priv->rps.sw_turbo.up.is_up = true; + dev_priv->rps.sw_turbo.up.last_ts = 0; + dev_priv->rps.sw_turbo.up.last_c0 = 0; + + dev_priv->rps.sw_turbo.down.it_threshold_pct = threshold_down_pct; + dev_priv->rps.sw_turbo.down.eval_interval = ei_down; + dev_priv->rps.sw_turbo.down.is_up = false; + dev_priv->rps.sw_turbo.down.last_ts = 0; + dev_priv->rps.sw_turbo.down.last_c0 = 0; + + /* Start the timer to track if flip comes*/ + dev_priv->rps.sw_turbo.timeout = 200*1000; /* in us */ + + init_timer(&dev_priv->rps.sw_turbo.flip_timer); + dev_priv->rps.sw_turbo.flip_timer.function = flip_active_timeout_handler; + dev_priv->rps.sw_turbo.flip_timer.data = (unsigned long) dev_priv; + dev_priv->rps.sw_turbo.flip_timer.expires = + usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies; + add_timer(&dev_priv->rps.sw_turbo.flip_timer); + INIT_WORK(&dev_priv->rps.sw_turbo.work_max_freq, gen8_set_frequency_RP0); + + atomic_set(&dev_priv->rps.sw_turbo.flip_received, true); + } else { + /* NB: Docs say 1s, and 1000000 - which aren't equivalent + * 1 second timeout*/ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, FREQ_1_28_US(1000000)); - /* Docs recommend 900MHz, and 300 MHz respectively */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - dev_priv->rps.max_freq_softlimit << 24 | - dev_priv->rps.min_freq_softlimit << 16); + /* Docs recommend 900MHz, and 300 MHz respectively */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, + dev_priv->rps.max_freq_softlimit << 24 | + dev_priv->rps.min_freq_softlimit << 16); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ - I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ - I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ + I915_WRITE(GEN6_RP_UP_THRESHOLD, + FREQ_1_28_US(ei_up * threshold_up_pct / 100)); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, + FREQ_1_28_US(ei_down * threshold_down_pct / 100)); + I915_WRITE(GEN6_RP_UP_EI, + FREQ_1_28_US(ei_up)); + I915_WRITE(GEN6_RP_DOWN_EI, + FREQ_1_28_US(ei_down)); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + } /* 5: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - - /* 6: Ring frequency + overclocking (our driver does this later */ - + rp_ctl_flag = GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG; + if (!dev_priv->rps.is_bdw_sw_turbo) + rp_ctl_flag |= GEN6_RP_ENABLE; + + I915_WRITE(GEN6_RP_CONTROL, rp_ctl_flag); + + /* 6: Ring frequency + overclocking + * (our driver does this later */ gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8); - - gen8_enable_rps_interrupts(dev); + if (!dev_priv->rps.is_bdw_sw_turbo) + gen8_enable_rps_interrupts(dev); gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } @@ -4084,11 +4276,27 @@ static void valleyview_cleanup_pctx(struct drm_device *dev) static void valleyview_init_gt_powersave(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + u32 val; valleyview_setup_pctx(dev); mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + switch ((val >> 6) & 3) { + case 0: + case 1: + dev_priv->mem_freq = 800; + break; + case 2: + dev_priv->mem_freq = 1066; + break; + case 3: + dev_priv->mem_freq = 1333; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq); + dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", @@ -4123,11 +4331,38 @@ static void valleyview_init_gt_powersave(struct drm_device *dev) static void cherryview_init_gt_powersave(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + u32 val; cherryview_setup_pctx(dev); mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, CCK_FUSE_REG); + switch ((val >> 2) & 0x7) { + case 0: + case 1: + dev_priv->rps.cz_freq = 200; + dev_priv->mem_freq = 1600; + break; + case 2: + dev_priv->rps.cz_freq = 267; + dev_priv->mem_freq = 1600; + break; + case 3: + dev_priv->rps.cz_freq = 333; + dev_priv->mem_freq = 2000; + break; + case 4: + dev_priv->rps.cz_freq = 320; + dev_priv->mem_freq = 1600; + break; + case 5: + dev_priv->rps.cz_freq = 400; + dev_priv->mem_freq = 1600; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq); + dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", @@ -4149,6 +4384,12 @@ static void cherryview_init_gt_powersave(struct drm_device *dev) vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); + WARN_ONCE((dev_priv->rps.max_freq | + dev_priv->rps.efficient_freq | + dev_priv->rps.rp1_freq | + dev_priv->rps.min_freq) & 1, + "Odd GPU freq values\n"); + /* Preserve min/max settings in case of re-init */ if (dev_priv->rps.max_freq_softlimit == 0) dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; @@ -5134,6 +5375,8 @@ static void intel_gen6_powersave_work(struct work_struct *work) rps.delayed_resume_work.work); struct drm_device *dev = dev_priv->dev; + dev_priv->rps.is_bdw_sw_turbo = false; + mutex_lock(&dev_priv->rps.hw_lock); if (IS_CHERRYVIEW(dev)) { @@ -5207,7 +5450,7 @@ static void g4x_disable_trickle_feed(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int pipe; - for_each_pipe(pipe) { + for_each_pipe(dev_priv, pipe) { I915_WRITE(DSPCNTR(pipe), I915_READ(DSPCNTR(pipe)) | DISPPLANE_TRICKLE_FEED_DISABLE); @@ -5322,7 +5565,7 @@ static void cpt_init_clock_gating(struct drm_device *dev) /* The below fixes the weird display corruption, a few pixels shifted * downward, on (only) LVDS of some HP laptops with IVY. */ - for_each_pipe(pipe) { + for_each_pipe(dev_priv, pipe) { val = I915_READ(TRANS_CHICKEN2(pipe)); val |= TRANS_CHICKEN2_TIMING_OVERRIDE; val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; @@ -5334,7 +5577,7 @@ static void cpt_init_clock_gating(struct drm_device *dev) I915_WRITE(TRANS_CHICKEN2(pipe), val); } /* WADP0ClockGatingDisable */ - for_each_pipe(pipe) { + for_each_pipe(dev_priv, pipe) { I915_WRITE(TRANS_CHICKEN1(pipe), TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); } @@ -5502,7 +5745,7 @@ static void lpt_suspend_hw(struct drm_device *dev) } } -static void gen8_init_clock_gating(struct drm_device *dev) +static void broadwell_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; enum pipe pipe; @@ -5514,37 +5757,12 @@ static void gen8_init_clock_gating(struct drm_device *dev) /* FIXME(BDW): Check all the w/a, some might only apply to * pre-production hw. */ - /* WaDisablePartialInstShootdown:bdw */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); - - /* WaDisableThreadStallDopClockGating:bdw */ - /* FIXME: Unclear whether we really need this on production bdw. */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); - /* - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for - * pre-production hardware - */ - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); - I915_WRITE(COMMON_SLICE_CHICKEN2, - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); - - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); - - /* WaDisableDopClockGating:bdw May not be needed for production */ - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -5554,37 +5772,18 @@ static void gen8_init_clock_gating(struct drm_device *dev) I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ - for_each_pipe(pipe) { + for_each_pipe(dev_priv, pipe) { I915_WRITE(CHICKEN_PIPESL_1(pipe), I915_READ(CHICKEN_PIPESL_1(pipe)) | BDW_DPRS_MASK_VBLANK_SRD); } - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - I915_WRITE(HDC_CHICKEN0, - I915_READ(HDC_CHICKEN0) | - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); - /* WaVSRefCountFullforceMissDisable:bdw */ /* WaDSRefCountFullforceMissDisable:bdw */ I915_WRITE(GEN7_FF_THREAD_MODE, I915_READ(GEN7_FF_THREAD_MODE) & ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); @@ -5592,9 +5791,7 @@ static void gen8_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); - /* Wa4x4STCOptimizationDisable:bdw */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); + lpt_init_clock_gating(dev); } static void haswell_init_clock_gating(struct drm_device *dev) @@ -5750,24 +5947,6 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) static void valleyview_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 val; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - mutex_unlock(&dev_priv->rps.hw_lock); - switch ((val >> 6) & 3) { - case 0: - case 1: - dev_priv->mem_freq = 800; - break; - case 2: - dev_priv->mem_freq = 1066; - break; - case 3: - dev_priv->mem_freq = 1333; - break; - } - DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq); I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); @@ -5843,48 +6022,11 @@ static void valleyview_init_clock_gating(struct drm_device *dev) static void cherryview_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 val; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, CCK_FUSE_REG); - mutex_unlock(&dev_priv->rps.hw_lock); - switch ((val >> 2) & 0x7) { - case 0: - case 1: - dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_200; - dev_priv->mem_freq = 1600; - break; - case 2: - dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_267; - dev_priv->mem_freq = 1600; - break; - case 3: - dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_333; - dev_priv->mem_freq = 2000; - break; - case 4: - dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_320; - dev_priv->mem_freq = 1600; - break; - case 5: - dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_400; - dev_priv->mem_freq = 1600; - break; - } - DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq); I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); - /* WaDisablePartialInstShootdown:chv */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); - - /* WaDisableThreadStallDopClockGating:chv */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); - /* WaVSRefCountFullforceMissDisable:chv */ /* WaDSRefCountFullforceMissDisable:chv */ I915_WRITE(GEN7_FF_THREAD_MODE, @@ -5903,10 +6045,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); - /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); - /* WaDisableGunitClockGating:chv (pre-production hw) */ I915_WRITE(VLV_GUNIT_CLOCK_GATE, I915_READ(VLV_GUNIT_CLOCK_GATE) | GINT_DIS); @@ -5916,8 +6054,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev) _MASKED_BIT_ENABLE(GEN8_FF_DOP_CLOCK_GATE_DISABLE)); /* WaDisableDopClockGating:chv (pre-production hw) */ - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } @@ -6002,6 +6138,9 @@ static void gen3_init_clock_gating(struct drm_device *dev) /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); + + I915_WRITE(MI_ARB_STATE, + _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); } static void i85x_init_clock_gating(struct drm_device *dev) @@ -6013,6 +6152,9 @@ static void i85x_init_clock_gating(struct drm_device *dev) /* interrupts should cause a wake up from C3 */ I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); + + I915_WRITE(MEM_MODE, + _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); } static void i830_init_clock_gating(struct drm_device *dev) @@ -6020,6 +6162,10 @@ static void i830_init_clock_gating(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); + + I915_WRITE(MEM_MODE, + _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | + _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); } void intel_init_clock_gating(struct drm_device *dev) @@ -6322,6 +6468,8 @@ static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv, spin_unlock_irq(&dev_priv->irq_lock); vlv_set_power_well(dev_priv, power_well, false); + + vlv_power_sequencer_reset(dev_priv); } static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, @@ -6357,12 +6505,11 @@ static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - struct drm_device *dev = dev_priv->dev; enum pipe pipe; WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC); - for_each_pipe(pipe) + for_each_pipe(dev_priv, pipe) assert_pll_disabled(dev_priv, pipe); /* Assert common reset */ @@ -7249,7 +7396,7 @@ void intel_init_pm(struct drm_device *dev) else if (IS_HASWELL(dev)) dev_priv->display.init_clock_gating = haswell_init_clock_gating; else if (INTEL_INFO(dev)->gen == 8) - dev_priv->display.init_clock_gating = gen8_init_clock_gating; + dev_priv->display.init_clock_gating = broadwell_init_clock_gating; } else if (IS_CHERRYVIEW(dev)) { dev_priv->display.update_wm = cherryview_update_wm; dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm; @@ -7443,6 +7590,7 @@ static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) return -1; } + /* CHV needs even values */ opcode = (DIV_ROUND_CLOSEST((val * 2 * mul), dev_priv->rps.cz_freq) * 2); return opcode; |