diff options
Diffstat (limited to 'drivers/thermal')
-rw-r--r-- | drivers/thermal/Kconfig | 17 | ||||
-rw-r--r-- | drivers/thermal/devfreq_cooling.c | 152 | ||||
-rw-r--r-- | drivers/thermal/intel_soc_dts_thermal.c | 9 | ||||
-rw-r--r-- | drivers/thermal/thermal_core.c | 64 |
4 files changed, 197 insertions, 45 deletions
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index f786ae433032..4edc011fe4a5 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -15,6 +15,23 @@ menuconfig THERMAL if THERMAL +config THERMAL_EMERGENCY_POWEROFF_DELAY_MS + int "Emergency poweroff delay in milli-seconds" + depends on THERMAL + default 0 + help + Thermal subsystem will issue a graceful shutdown when + critical temperatures are reached using orderly_poweroff(). In + case of failure of an orderly_poweroff(), the thermal emergency + poweroff kicks in after a delay has elapsed and shuts down the system. + This config is number of milliseconds to delay before emergency + poweroff kicks in. Similarly to the critical trip point, + the delay should be carefully profiled so as to give adequate + time for orderly_poweroff() to finish on regular execution. + If set to 0 emergency poweroff will not be supported. + + In doubt, leave as 0. + config THERMAL_HWMON bool prompt "Expose thermal sensors as hwmon device" diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 4bf4ad58cffd..ef59256887ff 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -28,6 +28,8 @@ #include <trace/events/thermal.h> +#define SCALE_ERROR_MITIGATION 100 + static DEFINE_IDA(devfreq_ida); /** @@ -45,6 +47,12 @@ static DEFINE_IDA(devfreq_ida); * @freq_table_size: Size of the @freq_table and @power_table * @power_ops: Pointer to devfreq_cooling_power, used to generate the * @power_table. + * @res_util: Resource utilization scaling factor for the power. + * It is multiplied by 100 to minimize the error. It is used + * for estimation of the power budget instead of using + * 'utilization' (which is 'busy_time / 'total_time'). + * The 'res_util' range is from 100 to (power_table[state] * 100) + * for the corresponding 'state'. */ struct devfreq_cooling_device { int id; @@ -55,6 +63,8 @@ struct devfreq_cooling_device { u32 *freq_table; size_t freq_table_size; struct devfreq_cooling_power *power_ops; + u32 res_util; + int capped_state; }; /** @@ -164,27 +174,12 @@ freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq) return THERMAL_CSTATE_INVALID; } -/** - * get_static_power() - calculate the static power - * @dfc: Pointer to devfreq cooling device - * @freq: Frequency in Hz - * - * Calculate the static power in milliwatts using the supplied - * get_static_power(). The current voltage is calculated using the - * OPP library. If no get_static_power() was supplied, assume the - * static power is negligible. - */ -static unsigned long -get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq) +static unsigned long get_voltage(struct devfreq *df, unsigned long freq) { - struct devfreq *df = dfc->devfreq; struct device *dev = df->dev.parent; unsigned long voltage; struct dev_pm_opp *opp; - if (!dfc->power_ops->get_static_power) - return 0; - opp = dev_pm_opp_find_freq_exact(dev, freq, true); if (PTR_ERR(opp) == -ERANGE) opp = dev_pm_opp_find_freq_exact(dev, freq, false); @@ -202,9 +197,35 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq) dev_err_ratelimited(dev, "Failed to get voltage for frequency %lu\n", freq); - return 0; } + return voltage; +} + +/** + * get_static_power() - calculate the static power + * @dfc: Pointer to devfreq cooling device + * @freq: Frequency in Hz + * + * Calculate the static power in milliwatts using the supplied + * get_static_power(). The current voltage is calculated using the + * OPP library. If no get_static_power() was supplied, assume the + * static power is negligible. + */ +static unsigned long +get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq) +{ + struct devfreq *df = dfc->devfreq; + unsigned long voltage; + + if (!dfc->power_ops->get_static_power) + return 0; + + voltage = get_voltage(df, freq); + + if (voltage == 0) + return 0; + return dfc->power_ops->get_static_power(df, voltage); } @@ -239,6 +260,16 @@ get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq, return power; } + +static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc, + unsigned long freq, + unsigned long voltage) +{ + return get_static_power(dfc, freq) + get_dynamic_power(dfc, freq, + voltage); +} + + static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, struct thermal_zone_device *tz, u32 *power) @@ -248,27 +279,55 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd struct devfreq_dev_status *status = &df->last_status; unsigned long state; unsigned long freq = status->current_frequency; - u32 dyn_power, static_power; + unsigned long voltage; + u32 dyn_power = 0; + u32 static_power = 0; + int res; - /* Get dynamic power for state */ state = freq_get_state(dfc, freq); - if (state == THERMAL_CSTATE_INVALID) - return -EAGAIN; + if (state == THERMAL_CSTATE_INVALID) { + res = -EAGAIN; + goto fail; + } - dyn_power = dfc->power_table[state]; + if (dfc->power_ops->get_real_power) { + voltage = get_voltage(df, freq); + if (voltage == 0) { + res = -EINVAL; + goto fail; + } - /* Scale dynamic power for utilization */ - dyn_power = (dyn_power * status->busy_time) / status->total_time; + res = dfc->power_ops->get_real_power(df, power, freq, voltage); + if (!res) { + state = dfc->capped_state; + dfc->res_util = dfc->power_table[state]; + dfc->res_util *= SCALE_ERROR_MITIGATION; - /* Get static power */ - static_power = get_static_power(dfc, freq); + if (*power > 1) + dfc->res_util /= *power; + } else { + goto fail; + } + } else { + dyn_power = dfc->power_table[state]; - trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power, - static_power); + /* Scale dynamic power for utilization */ + dyn_power *= status->busy_time; + dyn_power /= status->total_time; + /* Get static power */ + static_power = get_static_power(dfc, freq); - *power = dyn_power + static_power; + *power = dyn_power + static_power; + } + + trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power, + static_power, *power); return 0; +fail: + /* It is safe to set max in this case */ + dfc->res_util = SCALE_ERROR_MITIGATION; + return res; } static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, @@ -301,26 +360,34 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, unsigned long busy_time; s32 dyn_power; u32 static_power; + s32 est_power; int i; - static_power = get_static_power(dfc, freq); + if (dfc->power_ops->get_real_power) { + /* Scale for resource utilization */ + est_power = power * dfc->res_util; + est_power /= SCALE_ERROR_MITIGATION; + } else { + static_power = get_static_power(dfc, freq); - dyn_power = power - static_power; - dyn_power = dyn_power > 0 ? dyn_power : 0; + dyn_power = power - static_power; + dyn_power = dyn_power > 0 ? dyn_power : 0; - /* Scale dynamic power for utilization */ - busy_time = status->busy_time ?: 1; - dyn_power = (dyn_power * status->total_time) / busy_time; + /* Scale dynamic power for utilization */ + busy_time = status->busy_time ?: 1; + est_power = (dyn_power * status->total_time) / busy_time; + } /* * Find the first cooling state that is within the power * budget for dynamic power. */ for (i = 0; i < dfc->freq_table_size - 1; i++) - if (dyn_power >= dfc->power_table[i]) + if (est_power >= dfc->power_table[i]) break; *state = i; + dfc->capped_state = i; trace_thermal_power_devfreq_limit(cdev, freq, *state, power); return 0; } @@ -376,7 +443,7 @@ static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc) } for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) { - unsigned long power_dyn, voltage; + unsigned long power, voltage; struct dev_pm_opp *opp; opp = dev_pm_opp_find_freq_floor(dev, &freq); @@ -389,12 +456,15 @@ static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc) dev_pm_opp_put(opp); if (dfc->power_ops) { - power_dyn = get_dynamic_power(dfc, freq, voltage); + if (dfc->power_ops->get_real_power) + power = get_total_power(dfc, freq, voltage); + else + power = get_dynamic_power(dfc, freq, voltage); - dev_dbg(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n", - freq / 1000000, voltage, power_dyn, power_dyn); + dev_dbg(dev, "Power table: %lu MHz @ %lu mV: %lu = %lu mW\n", + freq / 1000000, voltage, power, power); - power_table[i] = power_dyn; + power_table[i] = power; } freq_table[i] = freq; diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c index b2bbaa1c60b0..c27868b2c6af 100644 --- a/drivers/thermal/intel_soc_dts_thermal.c +++ b/drivers/thermal/intel_soc_dts_thermal.c @@ -73,8 +73,12 @@ static int __init intel_soc_thermal_init(void) IRQF_TRIGGER_RISING | IRQF_ONESHOT, "soc_dts", soc_dts); if (err) { - pr_err("request_threaded_irq ret %d\n", err); - goto error_irq; + /* + * Do not just error out because the user space thermal + * daemon such as DPTF may use polling instead of being + * interrupt driven. + */ + pr_warn("request_threaded_irq ret %d\n", err); } } @@ -88,7 +92,6 @@ static int __init intel_soc_thermal_init(void) error_trips: if (soc_dts_thres_irq) free_irq(soc_dts_thres_irq, soc_dts); -error_irq: intel_soc_dts_iosf_exit(soc_dts); return err; diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 11f0675cb7e5..b21b9cc2c8d6 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -45,8 +45,10 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); +static DEFINE_MUTEX(poweroff_lock); static atomic_t in_suspend; +static bool power_off_triggered; static struct thermal_governor *def_governor; @@ -322,6 +324,54 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz, def_governor->throttle(tz, trip); } +/** + * thermal_emergency_poweroff_func - emergency poweroff work after a known delay + * @work: work_struct associated with the emergency poweroff function + * + * This function is called in very critical situations to force + * a kernel poweroff after a configurable timeout value. + */ +static void thermal_emergency_poweroff_func(struct work_struct *work) +{ + /* + * We have reached here after the emergency thermal shutdown + * Waiting period has expired. This means orderly_poweroff has + * not been able to shut off the system for some reason. + * Try to shut down the system immediately using kernel_power_off + * if populated + */ + WARN(1, "Attempting kernel_power_off: Temperature too high\n"); + kernel_power_off(); + + /* + * Worst of the worst case trigger emergency restart + */ + WARN(1, "Attempting emergency_restart: Temperature too high\n"); + emergency_restart(); +} + +static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work, + thermal_emergency_poweroff_func); + +/** + * thermal_emergency_poweroff - Trigger an emergency system poweroff + * + * This may be called from any critical situation to trigger a system shutdown + * after a known period of time. By default this is not scheduled. + */ +void thermal_emergency_poweroff(void) +{ + int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS; + /* + * poweroff_delay_ms must be a carefully profiled positive value. + * Its a must for thermal_emergency_poweroff_work to be scheduled + */ + if (poweroff_delay_ms <= 0) + return; + schedule_delayed_work(&thermal_emergency_poweroff_work, + msecs_to_jiffies(poweroff_delay_ms)); +} + static void handle_critical_trips(struct thermal_zone_device *tz, int trip, enum thermal_trip_type trip_type) { @@ -342,7 +392,17 @@ static void handle_critical_trips(struct thermal_zone_device *tz, dev_emerg(&tz->device, "critical temperature reached(%d C),shutting down\n", tz->temperature / 1000); - orderly_poweroff(true); + mutex_lock(&poweroff_lock); + if (!power_off_triggered) { + /* + * Queue a backup emergency shutdown in the event of + * orderly_poweroff failure + */ + thermal_emergency_poweroff(); + orderly_poweroff(true); + power_off_triggered = true; + } + mutex_unlock(&poweroff_lock); } } @@ -1463,6 +1523,7 @@ static int __init thermal_init(void) { int result; + mutex_init(&poweroff_lock); result = thermal_register_governors(); if (result) goto error; @@ -1497,6 +1558,7 @@ error: ida_destroy(&thermal_cdev_ida); mutex_destroy(&thermal_list_lock); mutex_destroy(&thermal_governor_lock); + mutex_destroy(&poweroff_lock); return result; } |