summaryrefslogtreecommitdiffstats
path: root/drivers/thermal/x86_pkg_temp_thermal.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/thermal/x86_pkg_temp_thermal.c')
-rw-r--r--drivers/thermal/x86_pkg_temp_thermal.c587
1 files changed, 246 insertions, 341 deletions
diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index 95f4c1bcdb4c..d93eee2f101b 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -54,37 +54,33 @@ MODULE_PARM_DESC(notify_delay_ms,
* is some wrong values returned by cpuid for number of thresholds.
*/
#define MAX_NUMBER_OF_TRIPS 2
-/* Limit number of package temp zones */
-#define MAX_PKG_TEMP_ZONE_IDS 256
-
-struct phy_dev_entry {
- struct list_head list;
- u16 phys_proc_id;
- u16 first_cpu;
- u32 tj_max;
- int ref_cnt;
- u32 start_pkg_therm_low;
- u32 start_pkg_therm_high;
- struct thermal_zone_device *tzone;
+
+struct pkg_device {
+ int cpu;
+ bool work_scheduled;
+ u32 tj_max;
+ u32 msr_pkg_therm_low;
+ u32 msr_pkg_therm_high;
+ struct delayed_work work;
+ struct thermal_zone_device *tzone;
+ struct cpumask cpumask;
};
static struct thermal_zone_params pkg_temp_tz_params = {
.no_hwmon = true,
};
-/* List maintaining number of package instances */
-static LIST_HEAD(phy_dev_list);
-static DEFINE_MUTEX(phy_dev_list_mutex);
-
-/* Interrupt to work function schedule queue */
-static DEFINE_PER_CPU(struct delayed_work, pkg_temp_thermal_threshold_work);
+/* Keep track of how many package pointers we allocated in init() */
+static int max_packages __read_mostly;
+/* Array of package pointers */
+static struct pkg_device **packages;
+/* Serializes interrupt notification, work and hotplug */
+static DEFINE_SPINLOCK(pkg_temp_lock);
+/* Protects zone operation in the work function against hotplug removal */
+static DEFINE_MUTEX(thermal_zone_mutex);
-/* To track if the work is already scheduled on a package */
-static u8 *pkg_work_scheduled;
-
-/* Spin lock to prevent races with pkg_work_scheduled */
-static spinlock_t pkg_work_lock;
-static u16 max_phy_id;
+/* The dynamically assigned cpu hotplug state for module_exit() */
+static enum cpuhp_state pkg_thermal_hp_state __read_mostly;
/* Debug counters to show using debugfs */
static struct dentry *debugfs;
@@ -116,22 +112,20 @@ err_out:
return -ENOENT;
}
-static struct phy_dev_entry
- *pkg_temp_thermal_get_phy_entry(unsigned int cpu)
+/*
+ * Protection:
+ *
+ * - cpu hotplug: Read serialized by cpu hotplug lock
+ * Write must hold pkg_temp_lock
+ *
+ * - Other callsites: Must hold pkg_temp_lock
+ */
+static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu)
{
- u16 phys_proc_id = topology_physical_package_id(cpu);
- struct phy_dev_entry *phy_ptr;
-
- mutex_lock(&phy_dev_list_mutex);
-
- list_for_each_entry(phy_ptr, &phy_dev_list, list)
- if (phy_ptr->phys_proc_id == phys_proc_id) {
- mutex_unlock(&phy_dev_list_mutex);
- return phy_ptr;
- }
-
- mutex_unlock(&phy_dev_list_mutex);
+ int pkgid = topology_logical_package_id(cpu);
+ if (pkgid >= 0 && pkgid < max_packages)
+ return packages[pkgid];
return NULL;
}
@@ -141,61 +135,44 @@ static struct phy_dev_entry
*/
static int get_tj_max(int cpu, u32 *tj_max)
{
- u32 eax, edx;
- u32 val;
+ u32 eax, edx, val;
int err;
err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
if (err)
- goto err_ret;
- else {
- val = (eax >> 16) & 0xff;
- if (val)
- *tj_max = val * 1000;
- else {
- err = -EINVAL;
- goto err_ret;
- }
- }
+ return err;
- return 0;
-err_ret:
- *tj_max = 0;
- return err;
+ val = (eax >> 16) & 0xff;
+ *tj_max = val * 1000;
+
+ return val ? 0 : -EINVAL;
}
static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
{
+ struct pkg_device *pkgdev = tzd->devdata;
u32 eax, edx;
- struct phy_dev_entry *phy_dev_entry;
- phy_dev_entry = tzd->devdata;
- rdmsr_on_cpu(phy_dev_entry->first_cpu, MSR_IA32_PACKAGE_THERM_STATUS,
- &eax, &edx);
+ rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_STATUS, &eax, &edx);
if (eax & 0x80000000) {
- *temp = phy_dev_entry->tj_max -
- ((eax >> 16) & 0x7f) * 1000;
+ *temp = pkgdev->tj_max - ((eax >> 16) & 0x7f) * 1000;
pr_debug("sys_get_curr_temp %d\n", *temp);
return 0;
}
-
return -EINVAL;
}
static int sys_get_trip_temp(struct thermal_zone_device *tzd,
- int trip, int *temp)
+ int trip, int *temp)
{
- u32 eax, edx;
- struct phy_dev_entry *phy_dev_entry;
- u32 mask, shift;
+ struct pkg_device *pkgdev = tzd->devdata;
unsigned long thres_reg_value;
+ u32 mask, shift, eax, edx;
int ret;
if (trip >= MAX_NUMBER_OF_TRIPS)
return -EINVAL;
- phy_dev_entry = tzd->devdata;
-
if (trip) {
mask = THERM_MASK_THRESHOLD1;
shift = THERM_SHIFT_THRESHOLD1;
@@ -204,14 +181,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
shift = THERM_SHIFT_THRESHOLD0;
}
- ret = rdmsr_on_cpu(phy_dev_entry->first_cpu,
- MSR_IA32_PACKAGE_THERM_INTERRUPT, &eax, &edx);
+ ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ &eax, &edx);
if (ret < 0)
- return -EINVAL;
+ return ret;
thres_reg_value = (eax & mask) >> shift;
if (thres_reg_value)
- *temp = phy_dev_entry->tj_max - thres_reg_value * 1000;
+ *temp = pkgdev->tj_max - thres_reg_value * 1000;
else
*temp = 0;
pr_debug("sys_get_trip_temp %d\n", *temp);
@@ -219,24 +196,20 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
return 0;
}
-static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
- int temp)
+static int
+sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
{
- u32 l, h;
- struct phy_dev_entry *phy_dev_entry;
- u32 mask, shift, intr;
+ struct pkg_device *pkgdev = tzd->devdata;
+ u32 l, h, mask, shift, intr;
int ret;
- phy_dev_entry = tzd->devdata;
-
- if (trip >= MAX_NUMBER_OF_TRIPS || temp >= phy_dev_entry->tj_max)
+ if (trip >= MAX_NUMBER_OF_TRIPS || temp >= pkgdev->tj_max)
return -EINVAL;
- ret = rdmsr_on_cpu(phy_dev_entry->first_cpu,
- MSR_IA32_PACKAGE_THERM_INTERRUPT,
- &l, &h);
+ ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ &l, &h);
if (ret < 0)
- return -EINVAL;
+ return ret;
if (trip) {
mask = THERM_MASK_THRESHOLD1;
@@ -252,24 +225,20 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
* When users space sets a trip temperature == 0, which is indication
* that, it is no longer interested in receiving notifications.
*/
- if (!temp)
+ if (!temp) {
l &= ~intr;
- else {
- l |= (phy_dev_entry->tj_max - temp)/1000 << shift;
+ } else {
+ l |= (pkgdev->tj_max - temp)/1000 << shift;
l |= intr;
}
- return wrmsr_on_cpu(phy_dev_entry->first_cpu,
- MSR_IA32_PACKAGE_THERM_INTERRUPT,
- l, h);
+ return wrmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
}
-static int sys_get_trip_type(struct thermal_zone_device *thermal,
- int trip, enum thermal_trip_type *type)
+static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
+ enum thermal_trip_type *type)
{
-
*type = THERMAL_TRIP_PASSIVE;
-
return 0;
}
@@ -281,7 +250,7 @@ static struct thermal_zone_device_ops tzone_ops = {
.set_trip_temp = sys_set_trip_temp,
};
-static bool pkg_temp_thermal_platform_thermal_rate_control(void)
+static bool pkg_thermal_rate_control(void)
{
return true;
}
@@ -289,8 +258,8 @@ static bool pkg_temp_thermal_platform_thermal_rate_control(void)
/* Enable threshold interrupt on local package/cpu */
static inline void enable_pkg_thres_interrupt(void)
{
- u32 l, h;
u8 thres_0, thres_1;
+ u32 l, h;
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
/* only enable/disable if it had valid threshold value */
@@ -307,271 +276,232 @@ static inline void enable_pkg_thres_interrupt(void)
static inline void disable_pkg_thres_interrupt(void)
{
u32 l, h;
+
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
- wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
- l & (~THERM_INT_THRESHOLD0_ENABLE) &
- (~THERM_INT_THRESHOLD1_ENABLE), h);
+
+ l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
}
static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
{
- __u64 msr_val;
+ struct thermal_zone_device *tzone = NULL;
int cpu = smp_processor_id();
- int phy_id = topology_physical_package_id(cpu);
- struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu);
- bool notify = false;
- unsigned long flags;
-
- if (!phdev)
- return;
+ struct pkg_device *pkgdev;
+ u64 msr_val, wr_val;
- spin_lock_irqsave(&pkg_work_lock, flags);
+ mutex_lock(&thermal_zone_mutex);
+ spin_lock_irq(&pkg_temp_lock);
++pkg_work_cnt;
- if (unlikely(phy_id > max_phy_id)) {
- spin_unlock_irqrestore(&pkg_work_lock, flags);
+
+ pkgdev = pkg_temp_thermal_get_dev(cpu);
+ if (!pkgdev) {
+ spin_unlock_irq(&pkg_temp_lock);
+ mutex_unlock(&thermal_zone_mutex);
return;
}
- pkg_work_scheduled[phy_id] = 0;
- spin_unlock_irqrestore(&pkg_work_lock, flags);
+ pkgdev->work_scheduled = false;
- enable_pkg_thres_interrupt();
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
- if (msr_val & THERM_LOG_THRESHOLD0) {
- wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS,
- msr_val & ~THERM_LOG_THRESHOLD0);
- notify = true;
- }
- if (msr_val & THERM_LOG_THRESHOLD1) {
- wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS,
- msr_val & ~THERM_LOG_THRESHOLD1);
- notify = true;
- }
- if (notify) {
- pr_debug("thermal_zone_device_update\n");
- thermal_zone_device_update(phdev->tzone,
- THERMAL_EVENT_UNSPECIFIED);
+ wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
+ if (wr_val != msr_val) {
+ wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
+ tzone = pkgdev->tzone;
}
+
+ enable_pkg_thres_interrupt();
+ spin_unlock_irq(&pkg_temp_lock);
+
+ /*
+ * If tzone is not NULL, then thermal_zone_mutex will prevent the
+ * concurrent removal in the cpu offline callback.
+ */
+ if (tzone)
+ thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
+
+ mutex_unlock(&thermal_zone_mutex);
}
-static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
+static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
+{
+ unsigned long ms = msecs_to_jiffies(notify_delay_ms);
+
+ schedule_delayed_work_on(cpu, work, ms);
+}
+
+static int pkg_thermal_notify(u64 msr_val)
{
- unsigned long flags;
int cpu = smp_processor_id();
- int phy_id = topology_physical_package_id(cpu);
+ struct pkg_device *pkgdev;
+ unsigned long flags;
- /*
- * When a package is in interrupted state, all CPU's in that package
- * are in the same interrupt state. So scheduling on any one CPU in
- * the package is enough and simply return for others.
- */
- spin_lock_irqsave(&pkg_work_lock, flags);
+ spin_lock_irqsave(&pkg_temp_lock, flags);
++pkg_interrupt_cnt;
- if (unlikely(phy_id > max_phy_id) || unlikely(!pkg_work_scheduled) ||
- pkg_work_scheduled[phy_id]) {
- disable_pkg_thres_interrupt();
- spin_unlock_irqrestore(&pkg_work_lock, flags);
- return -EINVAL;
- }
- pkg_work_scheduled[phy_id] = 1;
- spin_unlock_irqrestore(&pkg_work_lock, flags);
disable_pkg_thres_interrupt();
- schedule_delayed_work_on(cpu,
- &per_cpu(pkg_temp_thermal_threshold_work, cpu),
- msecs_to_jiffies(notify_delay_ms));
- return 0;
-}
-static int find_siblings_cpu(int cpu)
-{
- int i;
- int id = topology_physical_package_id(cpu);
-
- for_each_online_cpu(i)
- if (i != cpu && topology_physical_package_id(i) == id)
- return i;
+ /* Work is per package, so scheduling it once is enough. */
+ pkgdev = pkg_temp_thermal_get_dev(cpu);
+ if (pkgdev && !pkgdev->work_scheduled) {
+ pkgdev->work_scheduled = true;
+ pkg_thermal_schedule_work(pkgdev->cpu, &pkgdev->work);
+ }
+ spin_unlock_irqrestore(&pkg_temp_lock, flags);
return 0;
}
static int pkg_temp_thermal_device_add(unsigned int cpu)
{
- int err;
- u32 tj_max;
- struct phy_dev_entry *phy_dev_entry;
- int thres_count;
- u32 eax, ebx, ecx, edx;
- u8 *temp;
- unsigned long flags;
+ int pkgid = topology_logical_package_id(cpu);
+ u32 tj_max, eax, ebx, ecx, edx;
+ struct pkg_device *pkgdev;
+ int thres_count, err;
+
+ if (pkgid >= max_packages)
+ return -ENOMEM;
cpuid(6, &eax, &ebx, &ecx, &edx);
thres_count = ebx & 0x07;
if (!thres_count)
return -ENODEV;
- if (topology_physical_package_id(cpu) > MAX_PKG_TEMP_ZONE_IDS)
- return -ENODEV;
-
thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
err = get_tj_max(cpu, &tj_max);
if (err)
- goto err_ret;
-
- mutex_lock(&phy_dev_list_mutex);
+ return err;
- phy_dev_entry = kzalloc(sizeof(*phy_dev_entry), GFP_KERNEL);
- if (!phy_dev_entry) {
- err = -ENOMEM;
- goto err_ret_unlock;
- }
+ pkgdev = kzalloc(sizeof(*pkgdev), GFP_KERNEL);
+ if (!pkgdev)
+ return -ENOMEM;
- spin_lock_irqsave(&pkg_work_lock, flags);
- if (topology_physical_package_id(cpu) > max_phy_id)
- max_phy_id = topology_physical_package_id(cpu);
- temp = krealloc(pkg_work_scheduled,
- (max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
- if (!temp) {
- spin_unlock_irqrestore(&pkg_work_lock, flags);
- err = -ENOMEM;
- goto err_ret_free;
- }
- pkg_work_scheduled = temp;
- pkg_work_scheduled[topology_physical_package_id(cpu)] = 0;
- spin_unlock_irqrestore(&pkg_work_lock, flags);
-
- phy_dev_entry->phys_proc_id = topology_physical_package_id(cpu);
- phy_dev_entry->first_cpu = cpu;
- phy_dev_entry->tj_max = tj_max;
- phy_dev_entry->ref_cnt = 1;
- phy_dev_entry->tzone = thermal_zone_device_register("x86_pkg_temp",
+ INIT_DELAYED_WORK(&pkgdev->work, pkg_temp_thermal_threshold_work_fn);
+ pkgdev->cpu = cpu;
+ pkgdev->tj_max = tj_max;
+ pkgdev->tzone = thermal_zone_device_register("x86_pkg_temp",
thres_count,
- (thres_count == MAX_NUMBER_OF_TRIPS) ?
- 0x03 : 0x01,
- phy_dev_entry, &tzone_ops, &pkg_temp_tz_params, 0, 0);
- if (IS_ERR(phy_dev_entry->tzone)) {
- err = PTR_ERR(phy_dev_entry->tzone);
- goto err_ret_free;
+ (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
+ pkgdev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
+ if (IS_ERR(pkgdev->tzone)) {
+ err = PTR_ERR(pkgdev->tzone);
+ kfree(pkgdev);
+ return err;
}
/* Store MSR value for package thermal interrupt, to restore at exit */
- rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
- &phy_dev_entry->start_pkg_therm_low,
- &phy_dev_entry->start_pkg_therm_high);
-
- list_add_tail(&phy_dev_entry->list, &phy_dev_list);
- pr_debug("pkg_temp_thermal_device_add :phy_id %d cpu %d\n",
- phy_dev_entry->phys_proc_id, cpu);
-
- mutex_unlock(&phy_dev_list_mutex);
+ rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, pkgdev->msr_pkg_therm_low,
+ pkgdev->msr_pkg_therm_high);
+ cpumask_set_cpu(cpu, &pkgdev->cpumask);
+ spin_lock_irq(&pkg_temp_lock);
+ packages[pkgid] = pkgdev;
+ spin_unlock_irq(&pkg_temp_lock);
return 0;
-
-err_ret_free:
- kfree(phy_dev_entry);
-err_ret_unlock:
- mutex_unlock(&phy_dev_list_mutex);
-
-err_ret:
- return err;
}
-static int pkg_temp_thermal_device_remove(unsigned int cpu)
+static int pkg_thermal_cpu_offline(unsigned int cpu)
{
- struct phy_dev_entry *n;
- u16 phys_proc_id = topology_physical_package_id(cpu);
- struct phy_dev_entry *phdev =
- pkg_temp_thermal_get_phy_entry(cpu);
+ struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
+ bool lastcpu, was_target;
+ int target;
- if (!phdev)
- return -ENODEV;
+ if (!pkgdev)
+ return 0;
- mutex_lock(&phy_dev_list_mutex);
- /* If we are loosing the first cpu for this package, we need change */
- if (phdev->first_cpu == cpu) {
- phdev->first_cpu = find_siblings_cpu(cpu);
- pr_debug("thermal_device_remove: first cpu switched %d\n",
- phdev->first_cpu);
+ target = cpumask_any_but(&pkgdev->cpumask, cpu);
+ cpumask_clear_cpu(cpu, &pkgdev->cpumask);
+ lastcpu = target >= nr_cpu_ids;
+ /*
+ * Remove the sysfs files, if this is the last cpu in the package
+ * before doing further cleanups.
+ */
+ if (lastcpu) {
+ struct thermal_zone_device *tzone = pkgdev->tzone;
+
+ /*
+ * We must protect against a work function calling
+ * thermal_zone_update, after/while unregister. We null out
+ * the pointer under the zone mutex, so the worker function
+ * won't try to call.
+ */
+ mutex_lock(&thermal_zone_mutex);
+ pkgdev->tzone = NULL;
+ mutex_unlock(&thermal_zone_mutex);
+
+ thermal_zone_device_unregister(tzone);
}
+
+ /* Protect against work and interrupts */
+ spin_lock_irq(&pkg_temp_lock);
+
/*
- * It is possible that no siblings left as this was the last cpu
- * going offline. We don't need to worry about this assignment
- * as the phydev entry will be removed in this case and
- * thermal zone is removed.
- */
- --phdev->ref_cnt;
- pr_debug("thermal_device_remove: pkg: %d cpu %d ref_cnt %d\n",
- phys_proc_id, cpu, phdev->ref_cnt);
- if (!phdev->ref_cnt)
- list_for_each_entry_safe(phdev, n, &phy_dev_list, list) {
- if (phdev->phys_proc_id == phys_proc_id) {
- thermal_zone_device_unregister(phdev->tzone);
- list_del(&phdev->list);
- kfree(phdev);
- break;
- }
- }
- mutex_unlock(&phy_dev_list_mutex);
+ * Check whether this cpu was the current target and store the new
+ * one. When we drop the lock, then the interrupt notify function
+ * will see the new target.
+ */
+ was_target = pkgdev->cpu == cpu;
+ pkgdev->cpu = target;
- return 0;
-}
+ /*
+ * If this is the last CPU in the package remove the package
+ * reference from the array and restore the interrupt MSR. When we
+ * drop the lock neither the interrupt notify function nor the
+ * worker will see the package anymore.
+ */
+ if (lastcpu) {
+ packages[topology_logical_package_id(cpu)] = NULL;
+ /* After this point nothing touches the MSR anymore. */
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ pkgdev->msr_pkg_therm_low, pkgdev->msr_pkg_therm_high);
+ }
-static int get_core_online(unsigned int cpu)
-{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
- struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu);
-
- /* Check if there is already an instance for this package */
- if (!phdev) {
- if (!cpu_has(c, X86_FEATURE_DTHERM) ||
- !cpu_has(c, X86_FEATURE_PTS))
- return -ENODEV;
- if (pkg_temp_thermal_device_add(cpu))
- return -ENODEV;
- } else {
- mutex_lock(&phy_dev_list_mutex);
- ++phdev->ref_cnt;
- pr_debug("get_core_online: cpu %d ref_cnt %d\n",
- cpu, phdev->ref_cnt);
- mutex_unlock(&phy_dev_list_mutex);
+ /*
+ * Check whether there is work scheduled and whether the work is
+ * targeted at the outgoing CPU.
+ */
+ if (pkgdev->work_scheduled && was_target) {
+ /*
+ * To cancel the work we need to drop the lock, otherwise
+ * we might deadlock if the work needs to be flushed.
+ */
+ spin_unlock_irq(&pkg_temp_lock);
+ cancel_delayed_work_sync(&pkgdev->work);
+ spin_lock_irq(&pkg_temp_lock);
+ /*
+ * If this is not the last cpu in the package and the work
+ * did not run after we dropped the lock above, then we
+ * need to reschedule the work, otherwise the interrupt
+ * stays disabled forever.
+ */
+ if (!lastcpu && pkgdev->work_scheduled)
+ pkg_thermal_schedule_work(target, &pkgdev->work);
}
- INIT_DELAYED_WORK(&per_cpu(pkg_temp_thermal_threshold_work, cpu),
- pkg_temp_thermal_threshold_work_fn);
- pr_debug("get_core_online: cpu %d successful\n", cpu);
+ spin_unlock_irq(&pkg_temp_lock);
+ /* Final cleanup if this is the last cpu */
+ if (lastcpu)
+ kfree(pkgdev);
return 0;
}
-static void put_core_offline(unsigned int cpu)
+static int pkg_thermal_cpu_online(unsigned int cpu)
{
- if (!pkg_temp_thermal_device_remove(cpu))
- cancel_delayed_work_sync(
- &per_cpu(pkg_temp_thermal_threshold_work, cpu));
+ struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
- pr_debug("put_core_offline: cpu %d\n", cpu);
-}
+ /* Paranoia check */
+ if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
+ return -ENODEV;
-static int pkg_temp_thermal_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long) hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- get_core_online(cpu);
- break;
- case CPU_DOWN_PREPARE:
- put_core_offline(cpu);
- break;
+ /* If the package exists, nothing to do */
+ if (pkgdev) {
+ cpumask_set_cpu(cpu, &pkgdev->cpumask);
+ return 0;
}
- return NOTIFY_OK;
+ return pkg_temp_thermal_device_add(cpu);
}
-static struct notifier_block pkg_temp_thermal_notifier __refdata = {
- .notifier_call = pkg_temp_thermal_cpu_callback,
-};
-
static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS },
{}
@@ -580,71 +510,46 @@ MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
static int __init pkg_temp_thermal_init(void)
{
- int i;
+ int ret;
if (!x86_match_cpu(pkg_temp_thermal_ids))
return -ENODEV;
- spin_lock_init(&pkg_work_lock);
- platform_thermal_package_notify =
- pkg_temp_thermal_platform_thermal_notify;
- platform_thermal_package_rate_control =
- pkg_temp_thermal_platform_thermal_rate_control;
+ max_packages = topology_max_packages();
+ packages = kzalloc(max_packages * sizeof(struct pkg_device *), GFP_KERNEL);
+ if (!packages)
+ return -ENOMEM;
- cpu_notifier_register_begin();
- for_each_online_cpu(i)
- if (get_core_online(i))
- goto err_ret;
- __register_hotcpu_notifier(&pkg_temp_thermal_notifier);
- cpu_notifier_register_done();
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
+ pkg_thermal_cpu_online, pkg_thermal_cpu_offline);
+ if (ret < 0)
+ goto err;
- pkg_temp_debugfs_init(); /* Don't care if fails */
+ /* Store the state for module exit */
+ pkg_thermal_hp_state = ret;
- return 0;
+ platform_thermal_package_notify = pkg_thermal_notify;
+ platform_thermal_package_rate_control = pkg_thermal_rate_control;
-err_ret:
- for_each_online_cpu(i)
- put_core_offline(i);
- cpu_notifier_register_done();
- kfree(pkg_work_scheduled);
- platform_thermal_package_notify = NULL;
- platform_thermal_package_rate_control = NULL;
+ /* Don't care if it fails */
+ pkg_temp_debugfs_init();
+ return 0;
- return -ENODEV;
+err:
+ kfree(packages);
+ return ret;
}
+module_init(pkg_temp_thermal_init)
static void __exit pkg_temp_thermal_exit(void)
{
- struct phy_dev_entry *phdev, *n;
- int i;
-
- cpu_notifier_register_begin();
- __unregister_hotcpu_notifier(&pkg_temp_thermal_notifier);
- mutex_lock(&phy_dev_list_mutex);
- list_for_each_entry_safe(phdev, n, &phy_dev_list, list) {
- /* Retore old MSR value for package thermal interrupt */
- wrmsr_on_cpu(phdev->first_cpu,
- MSR_IA32_PACKAGE_THERM_INTERRUPT,
- phdev->start_pkg_therm_low,
- phdev->start_pkg_therm_high);
- thermal_zone_device_unregister(phdev->tzone);
- list_del(&phdev->list);
- kfree(phdev);
- }
- mutex_unlock(&phy_dev_list_mutex);
platform_thermal_package_notify = NULL;
platform_thermal_package_rate_control = NULL;
- for_each_online_cpu(i)
- cancel_delayed_work_sync(
- &per_cpu(pkg_temp_thermal_threshold_work, i));
- cpu_notifier_register_done();
-
- kfree(pkg_work_scheduled);
+ cpuhp_remove_state(pkg_thermal_hp_state);
debugfs_remove_recursive(debugfs);
+ kfree(packages);
}
-
-module_init(pkg_temp_thermal_init)
module_exit(pkg_temp_thermal_exit)
MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
OpenPOWER on IntegriCloud