diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/pmu.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/ptrace.h | 8 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 27 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_cpu.c | 113 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 717 | ||||
-rw-r--r-- | arch/arm/kernel/process.c | 9 |
7 files changed, 829 insertions, 49 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4d05bb93714a..fac6890b315d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2273,7 +2273,7 @@ source "kernel/power/Kconfig" config ARCH_SUSPEND_POSSIBLE depends on !ARCH_S5PC100 depends on CPU_ARM920T || CPU_ARM926T || CPU_FEROCEON || CPU_SA1100 || \ - CPU_V6 || CPU_V6K || CPU_V7 || CPU_XSC3 || CPU_XSCALE || CPU_MOHAWK + CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M || CPU_XSC3 || CPU_XSCALE || CPU_MOHAWK def_bool y config ARM_CPU_SUSPEND diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index f24edad26c70..ae1919be8f98 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -71,6 +71,8 @@ struct arm_pmu { void (*disable)(struct perf_event *event); int (*get_event_idx)(struct pmu_hw_events *hw_events, struct perf_event *event); + void (*clear_event_idx)(struct pmu_hw_events *hw_events, + struct perf_event *event); int (*set_event_filter)(struct hw_perf_event *evt, struct perf_event_attr *attr); u32 (*read_counter)(struct perf_event *event); diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index ee688b0a13c3..c877654fe3bf 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -27,9 +27,13 @@ struct pt_regs { #define thumb_mode(regs) (0) #endif +#ifndef CONFIG_CPU_V7M #define isa_mode(regs) \ - ((((regs)->ARM_cpsr & PSR_J_BIT) >> 23) | \ - (((regs)->ARM_cpsr & PSR_T_BIT) >> 5)) + ((((regs)->ARM_cpsr & PSR_J_BIT) >> (__ffs(PSR_J_BIT) - 1)) | \ + (((regs)->ARM_cpsr & PSR_T_BIT) >> (__ffs(PSR_T_BIT)))) +#else +#define isa_mode(regs) 1 /* Thumb */ +#endif #define processor_mode(regs) \ ((regs)->ARM_cpsr & MODE_MASK) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 789d846a9184..a6bc431cde70 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -16,6 +16,8 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/uaccess.h> +#include <linux/irq.h> +#include <linux/irqdesc.h> #include <asm/irq_regs.h> #include <asm/pmu.h> @@ -205,6 +207,8 @@ armpmu_del(struct perf_event *event, int flags) armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; clear_bit(idx, hw_events->used_mask); + if (armpmu->clear_event_idx) + armpmu->clear_event_idx(hw_events, event); perf_event_update_userpage(event); } @@ -295,14 +299,27 @@ validate_group(struct perf_event *event) static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) { - struct arm_pmu *armpmu = (struct arm_pmu *) dev; - struct platform_device *plat_device = armpmu->plat_device; - struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev); + struct arm_pmu *armpmu; + struct platform_device *plat_device; + struct arm_pmu_platdata *plat; + int ret; + u64 start_clock, finish_clock; + if (irq_is_percpu(irq)) + dev = *(void **)dev; + armpmu = dev; + plat_device = armpmu->plat_device; + plat = dev_get_platdata(&plat_device->dev); + + start_clock = sched_clock(); if (plat && plat->handle_irq) - return plat->handle_irq(irq, dev, armpmu->handle_irq); + ret = plat->handle_irq(irq, dev, armpmu->handle_irq); else - return armpmu->handle_irq(irq, dev); + ret = armpmu->handle_irq(irq, dev); + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return ret; } static void diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 20d553c9f5e2..51798d7854ac 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -25,6 +25,8 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/irq.h> +#include <linux/irqdesc.h> #include <asm/cputype.h> #include <asm/irq_regs.h> @@ -33,6 +35,7 @@ /* Set at runtime when we know what CPU type we are. */ static struct arm_pmu *cpu_pmu; +static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu); static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); @@ -71,6 +74,26 @@ static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) return this_cpu_ptr(&cpu_hw_events); } +static void cpu_pmu_enable_percpu_irq(void *data) +{ + struct arm_pmu *cpu_pmu = data; + struct platform_device *pmu_device = cpu_pmu->plat_device; + int irq = platform_get_irq(pmu_device, 0); + + enable_percpu_irq(irq, IRQ_TYPE_NONE); + cpumask_set_cpu(smp_processor_id(), &cpu_pmu->active_irqs); +} + +static void cpu_pmu_disable_percpu_irq(void *data) +{ + struct arm_pmu *cpu_pmu = data; + struct platform_device *pmu_device = cpu_pmu->plat_device; + int irq = platform_get_irq(pmu_device, 0); + + cpumask_clear_cpu(smp_processor_id(), &cpu_pmu->active_irqs); + disable_percpu_irq(irq); +} + static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) { int i, irq, irqs; @@ -78,12 +101,18 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) irqs = min(pmu_device->num_resources, num_possible_cpus()); - for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, cpu_pmu); + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + on_each_cpu(cpu_pmu_disable_percpu_irq, cpu_pmu, 1); + free_percpu_irq(irq, &percpu_pmu); + } else { + for (i = 0; i < irqs; ++i) { + if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, cpu_pmu); + } } } @@ -101,33 +130,44 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) return -ENODEV; } - for (i = 0; i < irqs; ++i) { - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); - continue; - } - - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - cpu_pmu); + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + err = request_percpu_irq(irq, handler, "arm-pmu", &percpu_pmu); if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); return err; } - - cpumask_set_cpu(i, &cpu_pmu->active_irqs); + on_each_cpu(cpu_pmu_enable_percpu_irq, cpu_pmu, 1); + } else { + for (i = 0; i < irqs; ++i) { + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); + continue; + } + + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + cpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + + cpumask_set_cpu(i, &cpu_pmu->active_irqs); + } } return 0; @@ -141,6 +181,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) events->events = per_cpu(hw_events, cpu); events->used_mask = per_cpu(used_mask, cpu); raw_spin_lock_init(&events->pmu_lock); + per_cpu(percpu_pmu, cpu) = cpu_pmu; } cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events; @@ -181,6 +222,7 @@ static struct notifier_block cpu_pmu_hotplug_notifier = { */ static struct of_device_id cpu_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, + {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, @@ -188,6 +230,7 @@ static struct of_device_id cpu_pmu_of_device_ids[] = { {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, {.compatible = "arm,arm1176-pmu", .data = armv6pmu_init}, {.compatible = "arm,arm1136-pmu", .data = armv6pmu_init}, + {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, {}, }; @@ -225,15 +268,6 @@ static int probe_current_pmu(struct arm_pmu *pmu) case ARM_CPU_PART_CORTEX_A9: ret = armv7_a9_pmu_init(pmu); break; - case ARM_CPU_PART_CORTEX_A5: - ret = armv7_a5_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A15: - ret = armv7_a15_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A7: - ret = armv7_a7_pmu_init(pmu); - break; } /* Intel CPUs [xscale]. */ } else if (implementor == ARM_CPU_IMP_INTEL) { @@ -270,6 +304,9 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) return -ENOMEM; } + cpu_pmu = pmu; + cpu_pmu->plat_device = pdev; + if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { init_fn = of_id->data; ret = init_fn(pmu); @@ -282,8 +319,6 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) goto out_free; } - cpu_pmu = pmu; - cpu_pmu->plat_device = pdev; cpu_pmu_init(cpu_pmu); ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW); diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 039cffb053a7..f4ef3981ed02 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -18,6 +18,10 @@ #ifdef CONFIG_CPU_V7 +#include <asm/cp15.h> +#include <asm/vfp.h> +#include "../vfp/vfpinstr.h" + /* * Common ARMv7 event types * @@ -109,6 +113,33 @@ enum armv7_a15_perf_types { ARMV7_A15_PERFCTR_PC_WRITE_SPEC = 0x76, }; +/* ARMv7 Cortex-A12 specific event types */ +enum armv7_a12_perf_types { + ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ = 0x40, + ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE = 0x41, + + ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ = 0x50, + ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE = 0x51, + + ARMV7_A12_PERFCTR_PC_WRITE_SPEC = 0x76, + + ARMV7_A12_PERFCTR_PF_TLB_REFILL = 0xe7, +}; + +/* ARMv7 Krait specific event types */ +enum krait_perf_types { + KRAIT_PMRESR0_GROUP0 = 0xcc, + KRAIT_PMRESR1_GROUP0 = 0xd0, + KRAIT_PMRESR2_GROUP0 = 0xd4, + KRAIT_VPMRESR0_GROUP0 = 0xd8, + + KRAIT_PERFCTR_L1_ICACHE_ACCESS = 0x10011, + KRAIT_PERFCTR_L1_ICACHE_MISS = 0x10010, + + KRAIT_PERFCTR_L1_ITLB_ACCESS = 0x12222, + KRAIT_PERFCTR_L1_DTLB_ACCESS = 0x12210, +}; + /* * Cortex-A8 HW events mapping * @@ -732,6 +763,262 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }; /* + * Cortex-A12 HW events mapping + */ +static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + /* + * Not all performance counters differentiate between read + * and write accesses/misses so we're not always strictly + * correct, but it's the best we can do. Writes and reads get + * combined in these cases. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Krait HW events mapping + */ +static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS, + [C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* * Perf Events' indices */ #define ARMV7_IDX_CYCLE_COUNTER 0 @@ -1212,6 +1499,24 @@ static int armv7_a7_map_event(struct perf_event *event) &armv7_a7_perf_cache_map, 0xFF); } +static int armv7_a12_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a12_perf_map, + &armv7_a12_perf_cache_map, 0xFF); +} + +static int krait_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &krait_perf_map, + &krait_perf_cache_map, 0xFFFFF); +} + +static int krait_map_event_no_branch(struct perf_event *event) +{ + return armpmu_map_event(event, &krait_perf_map_no_branch, + &krait_perf_cache_map, 0xFFFFF); +} + static void armv7pmu_init(struct arm_pmu *cpu_pmu) { cpu_pmu->handle_irq = armv7pmu_handle_irq; @@ -1283,6 +1588,408 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->set_event_filter = armv7pmu_set_event_filter; return 0; } + +static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A12"; + cpu_pmu->map_event = armv7_a12_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + return 0; +} + +/* + * Krait Performance Monitor Region Event Selection Register (PMRESRn) + * + * 31 30 24 16 8 0 + * +--------------------------------+ + * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0 + * +--------------------------------+ + * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1 + * +--------------------------------+ + * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2 + * +--------------------------------+ + * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ? + * +--------------------------------+ + * EN | G=3 | G=2 | G=1 | G=0 + * + * Event Encoding: + * + * hwc->config_base = 0xNRCCG + * + * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR) + * R = region register + * CC = class of events the group G is choosing from + * G = group or particular event + * + * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2 + * + * A region (R) corresponds to a piece of the CPU (execution unit, instruction + * unit, etc.) while the event code (CC) corresponds to a particular class of + * events (interrupts for example). An event code is broken down into + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for + * example). + */ + +#define KRAIT_EVENT (1 << 16) +#define VENUM_EVENT (2 << 16) +#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT) +#define PMRESRn_EN BIT(31) + +static u32 krait_read_pmresrn(int n) +{ + u32 val; + + switch (n) { + case 0: + asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val)); + break; + case 1: + asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val)); + break; + case 2: + asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val)); + break; + default: + BUG(); /* Should be validated in krait_pmu_get_event_idx() */ + } + + return val; +} + +static void krait_write_pmresrn(int n, u32 val) +{ + switch (n) { + case 0: + asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val)); + break; + case 1: + asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val)); + break; + case 2: + asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val)); + break; + default: + BUG(); /* Should be validated in krait_pmu_get_event_idx() */ + } +} + +static u32 krait_read_vpmresr0(void) +{ + u32 val; + asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); + return val; +} + +static void krait_write_vpmresr0(u32 val) +{ + asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); +} + +static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val) +{ + u32 venum_new_val; + u32 fp_new_val; + + BUG_ON(preemptible()); + /* CPACR Enable CP10 and CP11 access */ + *venum_orig_val = get_copro_access(); + venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11); + set_copro_access(venum_new_val); + + /* Enable FPEXC */ + *fp_orig_val = fmrx(FPEXC); + fp_new_val = *fp_orig_val | FPEXC_EN; + fmxr(FPEXC, fp_new_val); +} + +static void krait_post_vpmresr0(u32 venum_orig_val, u32 fp_orig_val) +{ + BUG_ON(preemptible()); + /* Restore FPEXC */ + fmxr(FPEXC, fp_orig_val); + isb(); + /* Restore CPACR */ + set_copro_access(venum_orig_val); +} + +static u32 krait_get_pmresrn_event(unsigned int region) +{ + static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0, + KRAIT_PMRESR1_GROUP0, + KRAIT_PMRESR2_GROUP0 }; + return pmresrn_table[region]; +} + +static void krait_evt_setup(int idx, u32 config_base) +{ + u32 val; + u32 mask; + u32 vval, fval; + unsigned int region; + unsigned int group; + unsigned int code; + unsigned int group_shift; + bool venum_event; + + venum_event = !!(config_base & VENUM_EVENT); + region = (config_base >> 12) & 0xf; + code = (config_base >> 4) & 0xff; + group = (config_base >> 0) & 0xf; + + group_shift = group * 8; + mask = 0xff << group_shift; + + /* Configure evtsel for the region and group */ + if (venum_event) + val = KRAIT_VPMRESR0_GROUP0; + else + val = krait_get_pmresrn_event(region); + val += group; + /* Mix in mode-exclusion bits */ + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); + armv7_pmnc_write_evtsel(idx, val); + + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + + if (venum_event) { + krait_pre_vpmresr0(&vval, &fval); + val = krait_read_vpmresr0(); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + krait_write_vpmresr0(val); + krait_post_vpmresr0(vval, fval); + } else { + val = krait_read_pmresrn(region); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + krait_write_pmresrn(region, val); + } +} + +static u32 krait_clear_pmresrn_group(u32 val, int group) +{ + u32 mask; + int group_shift; + + group_shift = group * 8; + mask = 0xff << group_shift; + val &= ~mask; + + /* Don't clear enable bit if entire region isn't disabled */ + if (val & ~PMRESRn_EN) + return val |= PMRESRn_EN; + + return 0; +} + +static void krait_clearpmu(u32 config_base) +{ + u32 val; + u32 vval, fval; + unsigned int region; + unsigned int group; + bool venum_event; + + venum_event = !!(config_base & VENUM_EVENT); + region = (config_base >> 12) & 0xf; + group = (config_base >> 0) & 0xf; + + if (venum_event) { + krait_pre_vpmresr0(&vval, &fval); + val = krait_read_vpmresr0(); + val = krait_clear_pmresrn_group(val, group); + krait_write_vpmresr0(val); + krait_post_vpmresr0(vval, fval); + } else { + val = krait_read_pmresrn(region); + val = krait_clear_pmresrn_group(val, group); + krait_write_pmresrn(region, val); + } +} + +static void krait_pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + /* Disable counter and interrupt */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Clear pmresr code (if destined for PMNx counters) + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + krait_clearpmu(hwc->config_base); + + /* Disable interrupt for this counter */ + armv7_pmnc_disable_intens(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void krait_pmu_enable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We set the event for the cycle counter because we + * have the ability to perform event filtering. + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + krait_evt_setup(idx, hwc->config_base); + else + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* Enable interrupt for this counter */ + armv7_pmnc_enable_intens(idx); + + /* Enable counter */ + armv7_pmnc_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void krait_pmu_reset(void *info) +{ + u32 vval, fval; + + armv7pmu_reset(info); + + /* Clear all pmresrs */ + krait_write_pmresrn(0, 0); + krait_write_pmresrn(1, 0); + krait_write_pmresrn(2, 0); + + krait_pre_vpmresr0(&vval, &fval); + krait_write_vpmresr0(0); + krait_post_vpmresr0(vval, fval); +} + +static int krait_event_to_bit(struct perf_event *event, unsigned int region, + unsigned int group) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + if (hwc->config_base & VENUM_EVENT) + bit = KRAIT_VPMRESR0_GROUP0; + else + bit = krait_get_pmresrn_event(region); + bit -= krait_get_pmresrn_event(0); + bit += group; + /* + * Lower bits are reserved for use by the counters (see + * armv7pmu_get_event_idx() for more info) + */ + bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; + + return bit; +} + +/* + * We check for column exclusion constraints here. + * Two events cant use the same group within a pmresr register. + */ +static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + int bit; + unsigned int prefix; + unsigned int region; + unsigned int code; + unsigned int group; + bool krait_event; + struct hw_perf_event *hwc = &event->hw; + + region = (hwc->config_base >> 12) & 0xf; + code = (hwc->config_base >> 4) & 0xff; + group = (hwc->config_base >> 0) & 0xf; + krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); + + if (krait_event) { + /* Ignore invalid events */ + if (group > 3 || region > 2) + return -EINVAL; + prefix = hwc->config_base & KRAIT_EVENT_MASK; + if (prefix != KRAIT_EVENT && prefix != VENUM_EVENT) + return -EINVAL; + if (prefix == VENUM_EVENT && (code & 0xe0)) + return -EINVAL; + + bit = krait_event_to_bit(event, region, group); + if (test_and_set_bit(bit, cpuc->used_mask)) + return -EAGAIN; + } + + idx = armv7pmu_get_event_idx(cpuc, event); + if (idx < 0 && krait_event) + clear_bit(bit, cpuc->used_mask); + + return idx; +} + +static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + unsigned int region; + unsigned int group; + bool krait_event; + + region = (hwc->config_base >> 12) & 0xf; + group = (hwc->config_base >> 0) & 0xf; + krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); + + if (krait_event) { + bit = krait_event_to_bit(event, region, group); + clear_bit(bit, cpuc->used_mask); + } +} + +static int krait_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Krait"; + /* Some early versions of Krait don't support PC write events */ + if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node, + "qcom,no-pc-write")) + cpu_pmu->map_event = krait_map_event_no_branch; + else + cpu_pmu->map_event = krait_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->reset = krait_pmu_reset; + cpu_pmu->enable = krait_pmu_enable_event; + cpu_pmu->disable = krait_pmu_disable_event; + cpu_pmu->get_event_idx = krait_pmu_get_event_idx; + cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; + return 0; +} #else static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { @@ -1308,4 +2015,14 @@ static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) { return -ENODEV; } + +static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int krait_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} #endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 92f7b15dd221..204f7d273319 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -48,14 +48,14 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif -static const char *processor_modes[] = { +static const char *processor_modes[] __maybe_unused = { "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" , "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26", "USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "UK6_32" , "ABT_32" , "UK8_32" , "UK9_32" , "UK10_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32" }; -static const char *isa_modes[] = { +static const char *isa_modes[] __maybe_unused = { "ARM" , "Thumb" , "Jazelle", "ThumbEE" }; @@ -276,12 +276,17 @@ void __show_regs(struct pt_regs *regs) buf[3] = flags & PSR_V_BIT ? 'V' : 'v'; buf[4] = '\0'; +#ifndef CONFIG_CPU_V7M printk("Flags: %s IRQs o%s FIQs o%s Mode %s ISA %s Segment %s\n", buf, interrupts_enabled(regs) ? "n" : "ff", fast_interrupts_enabled(regs) ? "n" : "ff", processor_modes[processor_mode(regs)], isa_modes[isa_mode(regs)], get_fs() == get_ds() ? "kernel" : "user"); +#else + printk("xPSR: %08lx\n", regs->ARM_cpsr); +#endif + #ifdef CONFIG_CPU_CP15 { unsigned int ctrl; |