summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_pmu.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-02 07:59:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-02 07:59:23 -0700
commit320b164abb32db876866a4ff8c2cb710524ac6ea (patch)
tree1f79119cde6e24c9f1d01fb1e51252bca7c4cdd5 /drivers/gpu/drm/i915/i915_pmu.c
parent0adb32858b0bddf4ada5f364a84ed60b196dbcda (diff)
parent694f54f680f7fd8e9561928fbfc537d9afbc3d79 (diff)
downloadtalos-op-linux-320b164abb32db876866a4ff8c2cb710524ac6ea.tar.gz
talos-op-linux-320b164abb32db876866a4ff8c2cb710524ac6ea.zip
Merge tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "Cannonlake and Vega12 support are probably the two major things. This pull lacks nouveau, Ben had some unforseen leave and a few other blockers so we'll see how things look or maybe leave it for this merge window. core: - Device links to handle sound/gpu pm dependency - Color encoding/range properties - Plane clipping into plane check helper - Backlight helpers - DP TP4 + HBR3 helper support amdgpu: - Vega12 support - Enable DC by default on all supported GPUs - Powerplay restructuring and cleanup - DC bandwidth calc updates - DC backlight on pre-DCE11 - TTM backing store dropping support - SR-IOV fixes - Adding "wattman" like functionality - DC crc support - Improved DC dual-link handling amdkfd: - GPUVM support for dGPU - KFD events for dGPU - Enable PCIe atomics for dGPUs - HSA process eviction support - Live-lock fixes for process eviction - VM page table allocation fix for large-bar systems panel: - Raydium RM68200 - AUO G104SN02 V2 - KEO TX31D200VM0BAA - ARM Versatile panels i915: - Cannonlake support enabled - AUX-F port support added - Icelake base enabling until internal milestone of forcewake support - Query uAPI interface (used for GPU topology information currently) - Compressed framebuffer support for sprites - kmem cache shrinking when GPU is idle - Avoid boosting GPU when waited item is being processed already - Avoid retraining LSPCON link unnecessarily - Decrease request signaling latency - Deprecation of I915_SET_COLORKEY_NONE - Kerneldoc and compiler warning cleanup for upcoming CI enforcements - Full range ycbcr toggling - HDCP support i915/gvt: - Big refactor for shadow ppgtt - KBL context save/restore via LRI cmd (Weinan) - Properly unmap dma for guest page (Changbin) vmwgfx: - Lots of various improvements etnaviv: - Use the drm gpu scheduler - prep work for GC7000L support vc4: - fix alpha blending - Expose perf counters to userspace pl111: - Bandwidth checking/limiting - Versatile panel support sun4i: - A83T HDMI support - A80 support - YUV plane support - H3/H5 HDMI support omapdrm: - HPD support for DVI connector - remove lots of static variables msm: - DSI updates from 10nm / SDM845 - fix for race condition with a3xx/a4xx fence completion irq - some refactoring/prep work for eventual a6xx support (ie. when we have a userspace) - a5xx debugfs enhancements - some mdp5 fixes/cleanups to prepare for eventually merging writeback - support (ie. when we have a userspace) tegra: - mmap() fixes for fbdev devices - Overlay plane for hw cursor fix - dma-buf cache maintenance support mali-dp: - YUV->RGB conversion support rockchip: - rk3399/chromebook fixes and improvements rcar-du: - LVDS support move to drm bridge - DT bindings for R8A77995 - Driver/DT support for R8A77970 tilcdc: - DRM panel support" * tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux: (1646 commits) drm/i915: Fix hibernation with ACPI S0 target state drm/i915/execlists: Use a locked clear_bit() for synchronisation with interrupt drm/i915: Specify which engines to reset following semaphore/event lockups drm/i915/dp: Write to SET_POWER dpcd to enable MST hub. drm/amdkfd: Use ordered workqueue to restore processes drm/amdgpu: Fix acquiring VM on large-BAR systems drm/amd/pp: clean header file hwmgr.h drm/amd/pp: use mlck_table.count for array loop index limit drm: Fix uabi regression by allowing garbage mode->type from userspace drm/amdgpu: Add an ATPX quirk for hybrid laptop drm/amdgpu: fix spelling mistake: "asssert" -> "assert" drm/amd/pp: Add new asic support in pp_psm.c drm/amd/pp: Clean up powerplay code on Vega12 drm/amd/pp: Add smu irq handlers for legacy asics drm/amd/pp: Fix set wrong temperature range on smu7 drm/amdgpu: Don't change preferred domian when fallback GTT v5 drm/vmwgfx: Bump version patchlevel and date drm/vmwgfx: use monotonic event timestamps drm/vmwgfx: Unpin the screen object backup buffer when not used drm/vmwgfx: Stricter count of legacy surface device resources ...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_pmu.c')
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c328
1 files changed, 240 insertions, 88 deletions
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 0e9b98c32b62..d8feb9053e0c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -329,6 +329,32 @@ engine_event_status(struct intel_engine_cs *engine,
return 0;
}
+static int
+config_status(struct drm_i915_private *i915, u64 config)
+{
+ switch (config) {
+ case I915_PMU_ACTUAL_FREQUENCY:
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ /* Requires a mutex for sampling! */
+ return -ENODEV;
+ /* Fall-through. */
+ case I915_PMU_REQUESTED_FREQUENCY:
+ if (INTEL_GEN(i915) < 6)
+ return -ENODEV;
+ break;
+ case I915_PMU_INTERRUPTS:
+ break;
+ case I915_PMU_RC6_RESIDENCY:
+ if (!HAS_RC6(i915))
+ return -ENODEV;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
static int engine_event_init(struct perf_event *event)
{
struct drm_i915_private *i915 =
@@ -376,30 +402,10 @@ static int i915_pmu_event_init(struct perf_event *event)
if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
return -EINVAL;
- if (is_engine_event(event)) {
+ if (is_engine_event(event))
ret = engine_event_init(event);
- } else {
- ret = 0;
- switch (event->attr.config) {
- case I915_PMU_ACTUAL_FREQUENCY:
- if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
- /* Requires a mutex for sampling! */
- ret = -ENODEV;
- case I915_PMU_REQUESTED_FREQUENCY:
- if (INTEL_GEN(i915) < 6)
- ret = -ENODEV;
- break;
- case I915_PMU_INTERRUPTS:
- break;
- case I915_PMU_RC6_RESIDENCY:
- if (!HAS_RC6(i915))
- ret = -ENODEV;
- break;
- default:
- ret = -ENOENT;
- break;
- }
- }
+ else
+ ret = config_status(i915, event->attr.config);
if (ret)
return ret;
@@ -427,7 +433,7 @@ static u64 __get_rc6(struct drm_i915_private *i915)
return val;
}
-static u64 get_rc6(struct drm_i915_private *i915, bool locked)
+static u64 get_rc6(struct drm_i915_private *i915)
{
#if IS_ENABLED(CONFIG_PM)
unsigned long flags;
@@ -443,8 +449,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
* previously.
*/
- if (!locked)
- spin_lock_irqsave(&i915->pmu.lock, flags);
+ spin_lock_irqsave(&i915->pmu.lock, flags);
if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
@@ -453,12 +458,10 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
}
- if (!locked)
- spin_unlock_irqrestore(&i915->pmu.lock, flags);
+ spin_unlock_irqrestore(&i915->pmu.lock, flags);
} else {
struct pci_dev *pdev = i915->drm.pdev;
struct device *kdev = &pdev->dev;
- unsigned long flags2;
/*
* We are runtime suspended.
@@ -467,10 +470,8 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
* on top of the last known real value, as the approximated RC6
* counter value.
*/
- if (!locked)
- spin_lock_irqsave(&i915->pmu.lock, flags);
-
- spin_lock_irqsave(&kdev->power.lock, flags2);
+ spin_lock_irqsave(&i915->pmu.lock, flags);
+ spin_lock(&kdev->power.lock);
if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
i915->pmu.suspended_jiffies_last =
@@ -480,14 +481,13 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
i915->pmu.suspended_jiffies_last;
val += jiffies - kdev->power.accounting_timestamp;
- spin_unlock_irqrestore(&kdev->power.lock, flags2);
+ spin_unlock(&kdev->power.lock);
val = jiffies_to_nsecs(val);
val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
- if (!locked)
- spin_unlock_irqrestore(&i915->pmu.lock, flags);
+ spin_unlock_irqrestore(&i915->pmu.lock, flags);
}
return val;
@@ -496,7 +496,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
#endif
}
-static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
+static u64 __i915_pmu_event_read(struct perf_event *event)
{
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
@@ -534,7 +534,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
val = count_interrupts(i915);
break;
case I915_PMU_RC6_RESIDENCY:
- val = get_rc6(i915, locked);
+ val = get_rc6(i915);
break;
}
}
@@ -549,7 +549,7 @@ static void i915_pmu_event_read(struct perf_event *event)
again:
prev = local64_read(&hwc->prev_count);
- new = __i915_pmu_event_read(event, false);
+ new = __i915_pmu_event_read(event);
if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
goto again;
@@ -599,14 +599,14 @@ static void i915_pmu_enable(struct perf_event *event)
engine->pmu.enable_count[sample]++;
}
+ spin_unlock_irqrestore(&i915->pmu.lock, flags);
+
/*
* Store the current counter value so we can report the correct delta
* for all listeners. Even when the event was already enabled and has
* an existing non-zero value.
*/
- local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
-
- spin_unlock_irqrestore(&i915->pmu.lock, flags);
+ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
}
static void i915_pmu_disable(struct perf_event *event)
@@ -726,52 +726,9 @@ static ssize_t i915_pmu_event_show(struct device *dev,
return sprintf(buf, "config=0x%lx\n", eattr->val);
}
-#define I915_EVENT_ATTR(_name, _config) \
- (&((struct i915_ext_attribute[]) { \
- { .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \
- .val = _config, } \
- })[0].attr.attr)
-
-#define I915_EVENT_STR(_name, _str) \
- (&((struct perf_pmu_events_attr[]) { \
- { .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
- .id = 0, \
- .event_str = _str, } \
- })[0].attr.attr)
-
-#define I915_EVENT(_name, _config, _unit) \
- I915_EVENT_ATTR(_name, _config), \
- I915_EVENT_STR(_name.unit, _unit)
-
-#define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \
- I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \
- I915_EVENT_STR(_name.unit, "ns")
-
-#define I915_ENGINE_EVENTS(_name, _class, _instance) \
- I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
- I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
- I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT)
-
-static struct attribute *i915_pmu_events_attrs[] = {
- I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0),
- I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0),
- I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0),
- I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1),
- I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
-
- I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"),
- I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"),
-
- I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS),
-
- I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"),
-
- NULL,
-};
-
-static const struct attribute_group i915_pmu_events_attr_group = {
+static struct attribute_group i915_pmu_events_attr_group = {
.name = "events",
- .attrs = i915_pmu_events_attrs,
+ /* Patch in attrs at runtime. */
};
static ssize_t
@@ -789,7 +746,7 @@ static struct attribute *i915_cpumask_attrs[] = {
NULL,
};
-static struct attribute_group i915_pmu_cpumask_attr_group = {
+static const struct attribute_group i915_pmu_cpumask_attr_group = {
.attrs = i915_cpumask_attrs,
};
@@ -800,6 +757,193 @@ static const struct attribute_group *i915_pmu_attr_groups[] = {
NULL
};
+#define __event(__config, __name, __unit) \
+{ \
+ .config = (__config), \
+ .name = (__name), \
+ .unit = (__unit), \
+}
+
+#define __engine_event(__sample, __name) \
+{ \
+ .sample = (__sample), \
+ .name = (__name), \
+}
+
+static struct i915_ext_attribute *
+add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
+{
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = i915_pmu_event_show;
+ attr->val = config;
+
+ return ++attr;
+}
+
+static struct perf_pmu_events_attr *
+add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
+ const char *str)
+{
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = perf_event_sysfs_show;
+ attr->event_str = str;
+
+ return ++attr;
+}
+
+static struct attribute **
+create_event_attributes(struct drm_i915_private *i915)
+{
+ static const struct {
+ u64 config;
+ const char *name;
+ const char *unit;
+ } events[] = {
+ __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
+ __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
+ __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
+ __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
+ };
+ static const struct {
+ enum drm_i915_pmu_engine_sample sample;
+ char *name;
+ } engine_events[] = {
+ __engine_event(I915_SAMPLE_BUSY, "busy"),
+ __engine_event(I915_SAMPLE_SEMA, "sema"),
+ __engine_event(I915_SAMPLE_WAIT, "wait"),
+ };
+ unsigned int count = 0;
+ struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
+ struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
+ struct attribute **attr = NULL, **attr_iter;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned int i;
+
+ /* Count how many counters we will be exposing. */
+ for (i = 0; i < ARRAY_SIZE(events); i++) {
+ if (!config_status(i915, events[i].config))
+ count++;
+ }
+
+ for_each_engine(engine, i915, id) {
+ for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
+ if (!engine_event_status(engine,
+ engine_events[i].sample))
+ count++;
+ }
+ }
+
+ /* Allocate attribute objects and table. */
+ i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
+ if (!i915_attr)
+ goto err_alloc;
+
+ pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
+ if (!pmu_attr)
+ goto err_alloc;
+
+ /* Max one pointer of each attribute type plus a termination entry. */
+ attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ goto err_alloc;
+
+ i915_iter = i915_attr;
+ pmu_iter = pmu_attr;
+ attr_iter = attr;
+
+ /* Initialize supported non-engine counters. */
+ for (i = 0; i < ARRAY_SIZE(events); i++) {
+ char *str;
+
+ if (config_status(i915, events[i].config))
+ continue;
+
+ str = kstrdup(events[i].name, GFP_KERNEL);
+ if (!str)
+ goto err;
+
+ *attr_iter++ = &i915_iter->attr.attr;
+ i915_iter = add_i915_attr(i915_iter, str, events[i].config);
+
+ if (events[i].unit) {
+ str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
+ if (!str)
+ goto err;
+
+ *attr_iter++ = &pmu_iter->attr.attr;
+ pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
+ }
+ }
+
+ /* Initialize supported engine counters. */
+ for_each_engine(engine, i915, id) {
+ for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
+ char *str;
+
+ if (engine_event_status(engine,
+ engine_events[i].sample))
+ continue;
+
+ str = kasprintf(GFP_KERNEL, "%s-%s",
+ engine->name, engine_events[i].name);
+ if (!str)
+ goto err;
+
+ *attr_iter++ = &i915_iter->attr.attr;
+ i915_iter =
+ add_i915_attr(i915_iter, str,
+ __I915_PMU_ENGINE(engine->uabi_class,
+ engine->instance,
+ engine_events[i].sample));
+
+ str = kasprintf(GFP_KERNEL, "%s-%s.unit",
+ engine->name, engine_events[i].name);
+ if (!str)
+ goto err;
+
+ *attr_iter++ = &pmu_iter->attr.attr;
+ pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
+ }
+ }
+
+ i915->pmu.i915_attr = i915_attr;
+ i915->pmu.pmu_attr = pmu_attr;
+
+ return attr;
+
+err:;
+ for (attr_iter = attr; *attr_iter; attr_iter++)
+ kfree((*attr_iter)->name);
+
+err_alloc:
+ kfree(attr);
+ kfree(i915_attr);
+ kfree(pmu_attr);
+
+ return NULL;
+}
+
+static void free_event_attributes(struct drm_i915_private *i915)
+{
+ struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
+
+ for (; *attr_iter; attr_iter++)
+ kfree((*attr_iter)->name);
+
+ kfree(i915_pmu_events_attr_group.attrs);
+ kfree(i915->pmu.i915_attr);
+ kfree(i915->pmu.pmu_attr);
+
+ i915_pmu_events_attr_group.attrs = NULL;
+ i915->pmu.i915_attr = NULL;
+ i915->pmu.pmu_attr = NULL;
+}
+
static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
{
struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
@@ -873,6 +1017,12 @@ void i915_pmu_register(struct drm_i915_private *i915)
return;
}
+ i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
+ if (!i915_pmu_events_attr_group.attrs) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
i915->pmu.base.attr_groups = i915_pmu_attr_groups;
i915->pmu.base.task_ctx_nr = perf_invalid_context;
i915->pmu.base.event_init = i915_pmu_event_init;
@@ -901,6 +1051,7 @@ err_unreg:
perf_pmu_unregister(&i915->pmu.base);
err:
i915->pmu.base.event_init = NULL;
+ free_event_attributes(i915);
DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
}
@@ -917,4 +1068,5 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
perf_pmu_unregister(&i915->pmu.base);
i915->pmu.base.event_init = NULL;
+ free_event_attributes(i915);
}
OpenPOWER on IntegriCloud