summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/dma/Kconfig12
-rw-r--r--kernel/dma/coherent.c16
-rw-r--r--kernel/dma/contiguous.c9
-rw-r--r--kernel/dma/debug.c39
-rw-r--r--kernel/dma/direct.c177
-rw-r--r--kernel/dma/mapping.c45
-rw-r--r--kernel/dma/remap.c55
-rw-r--r--kernel/dma/swiotlb.c2
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/kexec_file.c4
-rw-r--r--kernel/livepatch/patch.c3
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/power/snapshot.c9
-rw-r--r--kernel/sched/idle.c24
-rw-r--r--kernel/sched/wait.c37
-rw-r--r--kernel/trace/Kconfig27
-rw-r--r--kernel/trace/fgraph.c11
-rw-r--r--kernel/trace/ftrace.c613
-rw-r--r--kernel/trace/preemptirq_delay_test.c144
-rw-r--r--kernel/trace/ring_buffer_benchmark.c4
-rw-r--r--kernel/trace/trace.c214
-rw-r--r--kernel/trace/trace.h25
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_events.c29
-rw-r--r--kernel/trace/trace_events_hist.c2
-rw-r--r--kernel/trace/trace_export.c4
-rw-r--r--kernel/trace/trace_hwlat.c15
-rw-r--r--kernel/trace/trace_kprobe.c27
-rw-r--r--kernel/trace/trace_output.c15
-rw-r--r--kernel/trace/trace_seq.c30
-rw-r--r--kernel/trace/trace_stat.c6
-rw-r--r--kernel/trace/trace_stat.h2
-rw-r--r--kernel/trace/trace_syscalls.c32
34 files changed, 1307 insertions, 342 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 73c5c2b8e824..4c103a24e380 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -51,9 +51,6 @@ config ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
config ARCH_HAS_DMA_PREP_COHERENT
bool
-config ARCH_HAS_DMA_COHERENT_TO_PFN
- bool
-
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
@@ -68,9 +65,18 @@ config SWIOTLB
bool
select NEED_DMA_MAP_STATE
+#
+# Should be selected if we can mmap non-coherent mappings to userspace.
+# The only thing that is really required is a way to set an uncached bit
+# in the pagetables
+#
+config DMA_NONCOHERENT_MMAP
+ bool
+
config DMA_REMAP
depends on MMU
select GENERIC_ALLOCATOR
+ select DMA_NONCOHERENT_MMAP
bool
config DMA_DIRECT_REMAP
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 545e3869b0e3..551b0eb7028a 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -123,8 +123,9 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
return ret;
}
-static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
- ssize_t size, dma_addr_t *dma_handle)
+static void *__dma_alloc_from_coherent(struct device *dev,
+ struct dma_coherent_mem *mem,
+ ssize_t size, dma_addr_t *dma_handle)
{
int order = get_order(size);
unsigned long flags;
@@ -143,7 +144,7 @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
/*
* Memory was found in the coherent area.
*/
- *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
+ *dma_handle = dma_get_device_base(dev, mem) + (pageno << PAGE_SHIFT);
ret = mem->virt_base + (pageno << PAGE_SHIFT);
spin_unlock_irqrestore(&mem->spinlock, flags);
memset(ret, 0, size);
@@ -175,17 +176,18 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
if (!mem)
return 0;
- *ret = __dma_alloc_from_coherent(mem, size, dma_handle);
+ *ret = __dma_alloc_from_coherent(dev, mem, size, dma_handle);
return 1;
}
-void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
+void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size,
+ dma_addr_t *dma_handle)
{
if (!dma_coherent_default_memory)
return NULL;
- return __dma_alloc_from_coherent(dma_coherent_default_memory, size,
- dma_handle);
+ return __dma_alloc_from_coherent(dev, dma_coherent_default_memory, size,
+ dma_handle);
}
static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 69cfb4345388..daa4e6eefdde 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -42,10 +42,11 @@ struct cma *dma_contiguous_default_area;
* Users, who want to set the size of global CMA area for their system
* should use cma= kernel parameter.
*/
-static const phys_addr_t size_bytes = (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
-static phys_addr_t size_cmdline = -1;
-static phys_addr_t base_cmdline;
-static phys_addr_t limit_cmdline;
+static const phys_addr_t size_bytes __initconst =
+ (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
+static phys_addr_t size_cmdline __initdata = -1;
+static phys_addr_t base_cmdline __initdata;
+static phys_addr_t limit_cmdline __initdata;
static int __init early_cma(char *p)
{
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index a26170469543..2031ed1ad7fa 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -27,7 +27,7 @@
#include <asm/sections.h>
-#define HASH_SIZE 1024ULL
+#define HASH_SIZE 16384ULL
#define HASH_FN_SHIFT 13
#define HASH_FN_MASK (HASH_SIZE - 1)
@@ -54,40 +54,40 @@ enum map_err_types {
* struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
* @list: node on pre-allocated free_entries list
* @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
- * @type: single, page, sg, coherent
- * @pfn: page frame of the start address
- * @offset: offset of mapping relative to pfn
* @size: length of the mapping
+ * @type: single, page, sg, coherent
* @direction: enum dma_data_direction
* @sg_call_ents: 'nents' from dma_map_sg
* @sg_mapped_ents: 'mapped_ents' from dma_map_sg
+ * @pfn: page frame of the start address
+ * @offset: offset of mapping relative to pfn
* @map_err_type: track whether dma_mapping_error() was checked
* @stacktrace: support backtraces when a violation is detected
*/
struct dma_debug_entry {
struct list_head list;
struct device *dev;
- int type;
- unsigned long pfn;
- size_t offset;
u64 dev_addr;
u64 size;
+ int type;
int direction;
int sg_call_ents;
int sg_mapped_ents;
+ unsigned long pfn;
+ size_t offset;
enum map_err_types map_err_type;
#ifdef CONFIG_STACKTRACE
unsigned int stack_len;
unsigned long stack_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
#endif
-};
+} ____cacheline_aligned_in_smp;
typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *);
struct hash_bucket {
struct list_head list;
spinlock_t lock;
-} ____cacheline_aligned_in_smp;
+};
/* Hash list to save the allocated dma addresses */
static struct hash_bucket dma_entry_hash[HASH_SIZE];
@@ -255,12 +255,10 @@ static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
* Give up exclusive access to the hash bucket
*/
static void put_hash_bucket(struct hash_bucket *bucket,
- unsigned long *flags)
+ unsigned long flags)
__releases(&bucket->lock)
{
- unsigned long __flags = *flags;
-
- spin_unlock_irqrestore(&bucket->lock, __flags);
+ spin_unlock_irqrestore(&bucket->lock, flags);
}
static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b)
@@ -359,7 +357,7 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
/*
* Nothing found, go back a hash bucket
*/
- put_hash_bucket(*bucket, flags);
+ put_hash_bucket(*bucket, *flags);
range += (1 << HASH_FN_SHIFT);
index.dev_addr -= (1 << HASH_FN_SHIFT);
*bucket = get_hash_bucket(&index, flags);
@@ -420,6 +418,7 @@ void debug_dma_dump_mappings(struct device *dev)
}
spin_unlock_irqrestore(&bucket->lock, flags);
+ cond_resched();
}
}
@@ -608,7 +607,7 @@ static void add_dma_entry(struct dma_debug_entry *entry)
bucket = get_hash_bucket(entry, &flags);
hash_bucket_add(bucket, entry);
- put_hash_bucket(bucket, &flags);
+ put_hash_bucket(bucket, flags);
rc = active_cacheline_insert(entry);
if (rc == -ENOMEM) {
@@ -1001,7 +1000,7 @@ static void check_unmap(struct dma_debug_entry *ref)
if (!entry) {
/* must drop lock before calling dma_mapping_error */
- put_hash_bucket(bucket, &flags);
+ put_hash_bucket(bucket, flags);
if (dma_mapping_error(ref->dev, ref->dev_addr)) {
err_printk(ref->dev, NULL,
@@ -1083,7 +1082,7 @@ static void check_unmap(struct dma_debug_entry *ref)
hash_bucket_del(entry);
dma_entry_free(entry);
- put_hash_bucket(bucket, &flags);
+ put_hash_bucket(bucket, flags);
}
static void check_for_stack(struct device *dev,
@@ -1203,7 +1202,7 @@ static void check_sync(struct device *dev,
}
out:
- put_hash_bucket(bucket, &flags);
+ put_hash_bucket(bucket, flags);
}
static void check_sg_segment(struct device *dev, struct scatterlist *sg)
@@ -1318,7 +1317,7 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
}
}
- put_hash_bucket(bucket, &flags);
+ put_hash_bucket(bucket, flags);
}
EXPORT_SYMBOL(debug_dma_mapping_error);
@@ -1391,7 +1390,7 @@ static int get_nr_mapped_entries(struct device *dev,
if (entry)
mapped_ents = entry->sg_mapped_ents;
- put_hash_bucket(bucket, &flags);
+ put_hash_bucket(bucket, flags);
return mapped_ents;
}
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 0b67c04e531b..6af7ae83c4ad 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -12,6 +12,7 @@
#include <linux/dma-contiguous.h>
#include <linux/dma-noncoherent.h>
#include <linux/pfn.h>
+#include <linux/vmalloc.h>
#include <linux/set_memory.h>
#include <linux/swiotlb.h>
@@ -26,10 +27,10 @@ static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
{
if (!dev->dma_mask) {
dev_err_once(dev, "DMA map on device without dma_mask\n");
- } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) {
+ } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_limit) {
dev_err_once(dev,
- "overflow %pad+%zu of DMA mask %llx bus mask %llx\n",
- &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask);
+ "overflow %pad+%zu of DMA mask %llx bus limit %llx\n",
+ &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
}
WARN_ON_ONCE(1);
}
@@ -42,6 +43,12 @@ static inline dma_addr_t phys_to_dma_direct(struct device *dev,
return phys_to_dma(dev, phys);
}
+static inline struct page *dma_direct_to_page(struct device *dev,
+ dma_addr_t dma_addr)
+{
+ return pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_addr)));
+}
+
u64 dma_direct_get_required_mask(struct device *dev)
{
u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT);
@@ -50,15 +57,14 @@ u64 dma_direct_get_required_mask(struct device *dev)
}
static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
- u64 *phys_mask)
+ u64 *phys_limit)
{
- if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask)
- dma_mask = dev->bus_dma_mask;
+ u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
if (force_dma_unencrypted(dev))
- *phys_mask = __dma_to_phys(dev, dma_mask);
+ *phys_limit = __dma_to_phys(dev, dma_limit);
else
- *phys_mask = dma_to_phys(dev, dma_mask);
+ *phys_limit = dma_to_phys(dev, dma_limit);
/*
* Optimistically try the zone that the physical address mask falls
@@ -68,9 +74,9 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
* Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
* zones.
*/
- if (*phys_mask <= DMA_BIT_MASK(zone_dma_bits))
+ if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits))
return GFP_DMA;
- if (*phys_mask <= DMA_BIT_MASK(32))
+ if (*phys_limit <= DMA_BIT_MASK(32))
return GFP_DMA32;
return 0;
}
@@ -78,16 +84,16 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
{
return phys_to_dma_direct(dev, phys) + size - 1 <=
- min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask);
+ min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
}
struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+ gfp_t gfp, unsigned long attrs)
{
size_t alloc_size = PAGE_ALIGN(size);
int node = dev_to_node(dev);
struct page *page = NULL;
- u64 phys_mask;
+ u64 phys_limit;
if (attrs & DMA_ATTR_NO_WARN)
gfp |= __GFP_NOWARN;
@@ -95,7 +101,7 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
/* we always manually zero the memory once we are done: */
gfp &= ~__GFP_ZERO;
gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
- &phys_mask);
+ &phys_limit);
page = dma_alloc_contiguous(dev, alloc_size, gfp);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
dma_free_contiguous(dev, page, alloc_size);
@@ -109,7 +115,7 @@ again:
page = NULL;
if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
- phys_mask < DMA_BIT_MASK(64) &&
+ phys_limit < DMA_BIT_MASK(64) &&
!(gfp & (GFP_DMA32 | GFP_DMA))) {
gfp |= GFP_DMA32;
goto again;
@@ -130,7 +136,16 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
struct page *page;
void *ret;
- page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
+ if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ dma_alloc_need_uncached(dev, attrs) &&
+ !gfpflags_allow_blocking(gfp)) {
+ ret = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
+ if (!ret)
+ return NULL;
+ goto done;
+ }
+
+ page = __dma_direct_alloc_pages(dev, size, gfp, attrs);
if (!page)
return NULL;
@@ -139,9 +154,28 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
/* remove any dirty cache lines on the kernel alias */
if (!PageHighMem(page))
arch_dma_prep_coherent(page, size);
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
/* return the page pointer as the opaque cookie */
- return page;
+ ret = page;
+ goto done;
+ }
+
+ if ((IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ dma_alloc_need_uncached(dev, attrs)) ||
+ (IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) {
+ /* remove any dirty cache lines on the kernel alias */
+ arch_dma_prep_coherent(page, PAGE_ALIGN(size));
+
+ /* create a coherent mapping */
+ ret = dma_common_contiguous_remap(page, PAGE_ALIGN(size),
+ dma_pgprot(dev, PAGE_KERNEL, attrs),
+ __builtin_return_address(0));
+ if (!ret) {
+ dma_free_contiguous(dev, page, size);
+ return ret;
+ }
+
+ memset(ret, 0, size);
+ goto done;
}
if (PageHighMem(page)) {
@@ -152,17 +186,14 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
* so log an error and fail.
*/
dev_info(dev, "Rejecting highmem page from CMA.\n");
- __dma_direct_free_pages(dev, size, page);
+ dma_free_contiguous(dev, page, size);
return NULL;
}
ret = page_address(page);
- if (force_dma_unencrypted(dev)) {
+ if (force_dma_unencrypted(dev))
set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
- *dma_handle = __phys_to_dma(dev, page_to_phys(page));
- } else {
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
- }
+
memset(ret, 0, size);
if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
@@ -170,15 +201,14 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
arch_dma_prep_coherent(page, size);
ret = uncached_kernel_address(ret);
}
-
+done:
+ if (force_dma_unencrypted(dev))
+ *dma_handle = __phys_to_dma(dev, page_to_phys(page));
+ else
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
return ret;
}
-void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page)
-{
- dma_free_contiguous(dev, page, size);
-}
-
void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
@@ -187,23 +217,28 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
!force_dma_unencrypted(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
- __dma_direct_free_pages(dev, size, cpu_addr);
+ dma_free_contiguous(dev, cpu_addr, size);
return;
}
+ if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ dma_free_from_pool(cpu_addr, PAGE_ALIGN(size)))
+ return;
+
if (force_dma_unencrypted(dev))
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
- if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
- dma_alloc_need_uncached(dev, attrs))
- cpu_addr = cached_kernel_address(cpu_addr);
- __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr));
+ if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr))
+ vunmap(cpu_addr);
+
+ dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
}
void *dma_direct_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+ !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
dma_alloc_need_uncached(dev, attrs))
return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
@@ -213,6 +248,7 @@ void dma_direct_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
{
if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+ !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
dma_alloc_need_uncached(dev, attrs))
arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
else
@@ -230,7 +266,7 @@ void dma_direct_sync_single_for_device(struct device *dev,
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
if (!dev_is_dma_coherent(dev))
- arch_sync_dma_for_device(dev, paddr, size, dir);
+ arch_sync_dma_for_device(paddr, size, dir);
}
EXPORT_SYMBOL(dma_direct_sync_single_for_device);
@@ -248,7 +284,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
dir, SYNC_FOR_DEVICE);
if (!dev_is_dma_coherent(dev))
- arch_sync_dma_for_device(dev, paddr, sg->length,
+ arch_sync_dma_for_device(paddr, sg->length,
dir);
}
}
@@ -264,8 +300,8 @@ void dma_direct_sync_single_for_cpu(struct device *dev,
phys_addr_t paddr = dma_to_phys(dev, addr);
if (!dev_is_dma_coherent(dev)) {
- arch_sync_dma_for_cpu(dev, paddr, size, dir);
- arch_sync_dma_for_cpu_all(dev);
+ arch_sync_dma_for_cpu(paddr, size, dir);
+ arch_sync_dma_for_cpu_all();
}
if (unlikely(is_swiotlb_buffer(paddr)))
@@ -283,7 +319,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
if (!dev_is_dma_coherent(dev))
- arch_sync_dma_for_cpu(dev, paddr, sg->length, dir);
+ arch_sync_dma_for_cpu(paddr, sg->length, dir);
if (unlikely(is_swiotlb_buffer(paddr)))
swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
@@ -291,7 +327,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
}
if (!dev_is_dma_coherent(dev))
- arch_sync_dma_for_cpu_all(dev);
+ arch_sync_dma_for_cpu_all();
}
EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu);
@@ -325,7 +361,7 @@ static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr,
size_t size)
{
return swiotlb_force != SWIOTLB_FORCE &&
- dma_capable(dev, dma_addr, size);
+ dma_capable(dev, dma_addr, size, true);
}
dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
@@ -342,7 +378,7 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
}
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- arch_sync_dma_for_device(dev, phys, size, dir);
+ arch_sync_dma_for_device(phys, size, dir);
return dma_addr;
}
EXPORT_SYMBOL(dma_direct_map_page);
@@ -374,7 +410,7 @@ dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
{
dma_addr_t dma_addr = paddr;
- if (unlikely(!dma_direct_possible(dev, dma_addr, size))) {
+ if (unlikely(!dma_capable(dev, dma_addr, size, false))) {
report_addr(dev, dma_addr, size);
return DMA_MAPPING_ERROR;
}
@@ -383,6 +419,59 @@ dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
}
EXPORT_SYMBOL(dma_direct_map_resource);
+int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+ struct page *page = dma_direct_to_page(dev, dma_addr);
+ int ret;
+
+ ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+ if (!ret)
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+ return ret;
+}
+
+#ifdef CONFIG_MMU
+bool dma_direct_can_mmap(struct device *dev)
+{
+ return dev_is_dma_coherent(dev) ||
+ IS_ENABLED(CONFIG_DMA_NONCOHERENT_MMAP);
+}
+
+int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+ unsigned long user_count = vma_pages(vma);
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned long pfn = PHYS_PFN(dma_to_phys(dev, dma_addr));
+ int ret = -ENXIO;
+
+ vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
+
+ if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
+ return ret;
+
+ if (vma->vm_pgoff >= count || user_count > count - vma->vm_pgoff)
+ return -ENXIO;
+ return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+ user_count << PAGE_SHIFT, vma->vm_page_prot);
+}
+#else /* CONFIG_MMU */
+bool dma_direct_can_mmap(struct device *dev)
+{
+ return false;
+}
+
+int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+ return -ENXIO;
+}
+#endif /* CONFIG_MMU */
+
/*
* Because 32-bit DMA masks are so common we expect every architecture to be
* able to satisfy them - either by not supporting more physical memory, or by
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index d9334f31a5af..12ff766ec1fa 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -112,24 +112,9 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
- struct page *page;
+ struct page *page = virt_to_page(cpu_addr);
int ret;
- if (!dev_is_dma_coherent(dev)) {
- unsigned long pfn;
-
- if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
- return -ENXIO;
-
- /* If the PFN is not valid, we do not have a struct page */
- pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
- if (!pfn_valid(pfn))
- return -ENXIO;
- page = pfn_to_page(pfn);
- } else {
- page = virt_to_page(cpu_addr);
- }
-
ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
if (!ret)
sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
@@ -154,7 +139,7 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_is_direct(ops))
- return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr,
+ return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr,
size, attrs);
if (!ops->get_sgtable)
return -ENXIO;
@@ -192,7 +177,6 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
unsigned long user_count = vma_pages(vma);
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long off = vma->vm_pgoff;
- unsigned long pfn;
int ret = -ENXIO;
vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
@@ -203,19 +187,8 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
if (off >= count || user_count > count - off)
return -ENXIO;
- if (!dev_is_dma_coherent(dev)) {
- if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
- return -ENXIO;
-
- /* If the PFN is not valid, we do not have a struct page */
- pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
- if (!pfn_valid(pfn))
- return -ENXIO;
- } else {
- pfn = page_to_pfn(virt_to_page(cpu_addr));
- }
-
- return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+ return remap_pfn_range(vma, vma->vm_start,
+ page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff,
user_count << PAGE_SHIFT, vma->vm_page_prot);
#else
return -ENXIO;
@@ -233,12 +206,8 @@ bool dma_can_mmap(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops)) {
- return IS_ENABLED(CONFIG_MMU) &&
- (dev_is_dma_coherent(dev) ||
- IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN));
- }
-
+ if (dma_is_direct(ops))
+ return dma_direct_can_mmap(dev);
return ops->mmap != NULL;
}
EXPORT_SYMBOL_GPL(dma_can_mmap);
@@ -263,7 +232,7 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_is_direct(ops))
- return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size,
+ return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size,
attrs);
if (!ops->mmap)
return -ENXIO;
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index c00b9258fa6a..d47bd40fc0f5 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -210,59 +210,4 @@ bool dma_free_from_pool(void *start, size_t size)
gen_pool_free(atomic_pool, (unsigned long)start, size);
return true;
}
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t flags, unsigned long attrs)
-{
- struct page *page = NULL;
- void *ret;
-
- size = PAGE_ALIGN(size);
-
- if (!gfpflags_allow_blocking(flags)) {
- ret = dma_alloc_from_pool(size, &page, flags);
- if (!ret)
- return NULL;
- goto done;
- }
-
- page = __dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
- if (!page)
- return NULL;
-
- /* remove any dirty cache lines on the kernel alias */
- arch_dma_prep_coherent(page, size);
-
- /* create a coherent mapping */
- ret = dma_common_contiguous_remap(page, size,
- dma_pgprot(dev, PAGE_KERNEL, attrs),
- __builtin_return_address(0));
- if (!ret) {
- __dma_direct_free_pages(dev, size, page);
- return ret;
- }
-
- memset(ret, 0, size);
-done:
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
- return ret;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
- phys_addr_t phys = dma_to_phys(dev, dma_handle);
- struct page *page = pfn_to_page(__phys_to_pfn(phys));
-
- vunmap(vaddr);
- __dma_direct_free_pages(dev, size, page);
- }
-}
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
- dma_addr_t dma_addr)
-{
- return __phys_to_pfn(dma_to_phys(dev, dma_addr));
-}
#endif /* CONFIG_DMA_DIRECT_REMAP */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 673a2cdb2656..9280d6f8271e 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -678,7 +678,7 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
/* Ensure that the address returned is DMA'ble */
*dma_addr = __phys_to_dma(dev, *phys);
- if (unlikely(!dma_capable(dev, *dma_addr, size))) {
+ if (unlikely(!dma_capable(dev, *dma_addr, size, true))) {
swiotlb_tbl_unmap_single(dev, *phys, size, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
return false;
diff --git a/kernel/exit.c b/kernel/exit.c
index 0bac4b60d5f3..bcbd59888e67 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1409,7 +1409,7 @@ static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
{
__wake_up_sync_key(&parent->signal->wait_chldexit,
- TASK_INTERRUPTIBLE, 1, p);
+ TASK_INTERRUPTIBLE, p);
}
static long do_wait(struct wait_opts *wo)
diff --git a/kernel/fork.c b/kernel/fork.c
index 00b64f41c2b4..0f0bac8318dd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,7 +40,6 @@
#include <linux/binfmts.h>
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
-#include <linux/hmm.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/vmacache.h>
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 79f252af7dee..a2df93948665 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -1304,7 +1304,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
if (kernel_map) {
phdr->p_type = PT_LOAD;
phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_vaddr = (Elf64_Addr)_text;
+ phdr->p_vaddr = (unsigned long) _text;
phdr->p_filesz = phdr->p_memsz = _end - _text;
phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
ehdr->e_phnum++;
@@ -1321,7 +1321,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
phdr->p_offset = mstart;
phdr->p_paddr = mstart;
- phdr->p_vaddr = (unsigned long long) __va(mstart);
+ phdr->p_vaddr = (unsigned long) __va(mstart);
phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
phdr->p_align = 0;
ehdr->e_phnum++;
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index bd43537702bd..b552cf2d85f8 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -196,7 +196,8 @@ static int klp_patch_func(struct klp_func *func)
ops->fops.func = klp_ftrace_handler;
ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS |
FTRACE_OPS_FL_DYNAMIC |
- FTRACE_OPS_FL_IPMODIFY;
+ FTRACE_OPS_FL_IPMODIFY |
+ FTRACE_OPS_FL_PERMANENT;
list_add(&ops->node, &klp_ops);
diff --git a/kernel/module.c b/kernel/module.c
index acf7962936c4..052a40212b8e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3728,7 +3728,6 @@ static int complete_formation(struct module *mod, struct load_info *info)
module_enable_ro(mod, false);
module_enable_nx(mod);
- module_enable_x(mod);
/* Mark state as coming so strong_try_module_get() ignores us,
* but kallsyms etc. can see us. */
@@ -3751,6 +3750,11 @@ static int prepare_coming_module(struct module *mod)
if (err)
return err;
+ /* Make module executable after ftrace is enabled */
+ mutex_lock(&module_mutex);
+ module_enable_x(mod);
+ mutex_unlock(&module_mutex);
+
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_COMING, mod);
return 0;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 83105874f255..26b9168321e7 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -734,8 +734,15 @@ zone_found:
* We have found the zone. Now walk the radix tree to find the leaf node
* for our PFN.
*/
+
+ /*
+ * If the zone we wish to scan is the the current zone and the
+ * pfn falls into the current node then we do not need to walk
+ * the tree.
+ */
node = bm->cur.node;
- if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
+ if (zone == bm->cur.zone &&
+ ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
goto node_found;
node = zone->rtree;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 428cd05c0b5d..ffa959e91227 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -104,7 +104,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* update no idle residency and return.
*/
if (current_clr_polling_and_test()) {
- dev->last_residency = 0;
+ dev->last_residency_ns = 0;
local_irq_enable();
return -EBUSY;
}
@@ -165,7 +165,9 @@ static void cpuidle_idle_call(void)
* until a proper wakeup interrupt happens.
*/
- if (idle_should_enter_s2idle() || dev->use_deepest_state) {
+ if (idle_should_enter_s2idle() || dev->forced_idle_latency_limit_ns) {
+ u64 max_latency_ns;
+
if (idle_should_enter_s2idle()) {
rcu_idle_enter();
@@ -176,12 +178,16 @@ static void cpuidle_idle_call(void)
}
rcu_idle_exit();
+
+ max_latency_ns = U64_MAX;
+ } else {
+ max_latency_ns = dev->forced_idle_latency_limit_ns;
}
tick_nohz_idle_stop_tick();
rcu_idle_enter();
- next_state = cpuidle_find_deepest_state(drv, dev);
+ next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
call_cpuidle(drv, dev, next_state);
} else {
bool stop_tick = true;
@@ -311,7 +317,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-void play_idle(unsigned long duration_us)
+void play_idle_precise(u64 duration_ns, u64 latency_ns)
{
struct idle_timer it;
@@ -323,29 +329,29 @@ void play_idle(unsigned long duration_us)
WARN_ON_ONCE(current->nr_cpus_allowed != 1);
WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
- WARN_ON_ONCE(!duration_us);
+ WARN_ON_ONCE(!duration_ns);
rcu_sleep_check();
preempt_disable();
current->flags |= PF_IDLE;
- cpuidle_use_deepest_state(true);
+ cpuidle_use_deepest_state(latency_ns);
it.done = 0;
hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
it.timer.function = idle_inject_timer_fn;
- hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC),
+ hrtimer_start(&it.timer, ns_to_ktime(duration_ns),
HRTIMER_MODE_REL_PINNED);
while (!READ_ONCE(it.done))
do_idle();
- cpuidle_use_deepest_state(false);
+ cpuidle_use_deepest_state(0);
current->flags &= ~PF_IDLE;
preempt_fold_need_resched();
preempt_enable();
}
-EXPORT_SYMBOL_GPL(play_idle);
+EXPORT_SYMBOL_GPL(play_idle_precise);
void cpu_startup_entry(enum cpuhp_state state)
{
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index c1e566a114ca..ba059fbfc53a 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -169,7 +169,6 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
* __wake_up_sync_key - wake up threads blocked on a waitqueue.
* @wq_head: the waitqueue
* @mode: which threads
- * @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: opaque value to be passed to wakeup targets
*
* The sync wakeup differs that the waker knows that it will schedule
@@ -183,26 +182,44 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
* accessing the task state.
*/
void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
- int nr_exclusive, void *key)
+ void *key)
{
- int wake_flags = 1; /* XXX WF_SYNC */
-
if (unlikely(!wq_head))
return;
- if (unlikely(nr_exclusive != 1))
- wake_flags = 0;
-
- __wake_up_common_lock(wq_head, mode, nr_exclusive, wake_flags, key);
+ __wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
}
EXPORT_SYMBOL_GPL(__wake_up_sync_key);
+/**
+ * __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue.
+ * @wq_head: the waitqueue
+ * @mode: which threads
+ * @key: opaque value to be passed to wakeup targets
+ *
+ * The sync wakeup differs in that the waker knows that it will schedule
+ * away soon, so while the target thread will be woken up, it will not
+ * be migrated to another CPU - ie. the two threads are 'synchronized'
+ * with each other. This can prevent needless bouncing between CPUs.
+ *
+ * On UP it can prevent extra preemption.
+ *
+ * If this function wakes up a task, it executes a full memory barrier before
+ * accessing the task state.
+ */
+void __wake_up_locked_sync_key(struct wait_queue_head *wq_head,
+ unsigned int mode, void *key)
+{
+ __wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL);
+}
+EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);
+
/*
* __wake_up_sync - see __wake_up_sync_key()
*/
-void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive)
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
{
- __wake_up_sync_key(wq_head, mode, nr_exclusive, NULL);
+ __wake_up_sync_key(wq_head, mode, NULL);
}
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e08527f50d2a..cdf5afa87f65 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -33,6 +33,9 @@ config HAVE_DYNAMIC_FTRACE
config HAVE_DYNAMIC_FTRACE_WITH_REGS
bool
+config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ bool
+
config HAVE_FTRACE_MCOUNT_RECORD
bool
help
@@ -76,7 +79,7 @@ config FTRACE_NMI_ENTER
config EVENT_TRACING
select CONTEXT_SWITCH_TRACER
- select GLOB
+ select GLOB
bool
config CONTEXT_SWITCH_TRACER
@@ -106,7 +109,6 @@ config PREEMPTIRQ_TRACEPOINTS
config TRACING
bool
- select DEBUG_FS
select RING_BUFFER
select STACKTRACE if STACKTRACE_SUPPORT
select TRACEPOINTS
@@ -308,7 +310,7 @@ config TRACER_SNAPSHOT
cat snapshot
config TRACER_SNAPSHOT_PER_CPU_SWAP
- bool "Allow snapshot to swap per CPU"
+ bool "Allow snapshot to swap per CPU"
depends on TRACER_SNAPSHOT
select RING_BUFFER_ALLOW_SWAP
help
@@ -557,6 +559,11 @@ config DYNAMIC_FTRACE_WITH_REGS
depends on DYNAMIC_FTRACE
depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
+config DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ def_bool y
+ depends on DYNAMIC_FTRACE
+ depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+
config FUNCTION_PROFILER
bool "Kernel function profiler"
depends on FUNCTION_TRACER
@@ -675,7 +682,7 @@ config MMIOTRACE_TEST
Say N, unless you absolutely know what you are doing.
config TRACEPOINT_BENCHMARK
- bool "Add tracepoint that benchmarks tracepoints"
+ bool "Add tracepoint that benchmarks tracepoints"
help
This option creates the tracepoint "benchmark:benchmark_event".
When the tracepoint is enabled, it kicks off a kernel thread that
@@ -724,7 +731,7 @@ config RING_BUFFER_STARTUP_TEST
bool "Ring buffer startup self test"
depends on RING_BUFFER
help
- Run a simple self test on the ring buffer on boot up. Late in the
+ Run a simple self test on the ring buffer on boot up. Late in the
kernel boot sequence, the test will start that kicks off
a thread per cpu. Each thread will write various size events
into the ring buffer. Another thread is created to send IPIs
@@ -752,9 +759,9 @@ config PREEMPTIRQ_DELAY_TEST
configurable delay. The module busy waits for the duration of the
critical section.
- For example, the following invocation forces a one-time irq-disabled
- critical section for 500us:
- modprobe preemptirq_delay_test test_mode=irq delay=500000
+ For example, the following invocation generates a burst of three
+ irq-disabled critical sections for 500us:
+ modprobe preemptirq_delay_test test_mode=irq delay=500 burst_size=3
If unsure, say N
@@ -763,7 +770,7 @@ config TRACE_EVAL_MAP_FILE
depends on TRACING
help
The "print fmt" of the trace events will show the enum/sizeof names
- instead of their values. This can cause problems for user space tools
+ instead of their values. This can cause problems for user space tools
that use this string to parse the raw data as user space does not know
how to convert the string to its value.
@@ -784,7 +791,7 @@ config TRACE_EVAL_MAP_FILE
they are needed for the "eval_map" file. Enabling this option will
increase the memory footprint of the running kernel.
- If unsure, say N
+ If unsure, say N.
config GCOV_PROFILE_FTRACE
bool "Enable GCOV profiling on ftrace subsystem"
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 7950a0356042..67e0c462b059 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -332,9 +332,14 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
return 0;
}
+/*
+ * Simply points to ftrace_stub, but with the proper protocol.
+ * Defined by the linker script in linux/vmlinux.lds.h
+ */
+extern void ftrace_stub_graph(struct ftrace_graph_ret *);
+
/* The callbacks that hook a function */
-trace_func_graph_ret_t ftrace_graph_return =
- (trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -614,7 +619,7 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
goto out;
ftrace_graph_active--;
- ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+ ftrace_graph_return = ftrace_stub_graph;
ftrace_graph_entry = ftrace_graph_entry_stub;
__ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5259d4dea675..74439ab5c2b6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -326,6 +326,8 @@ int __register_ftrace_function(struct ftrace_ops *ops)
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
#endif
+ if (!ftrace_enabled && (ops->flags & FTRACE_OPS_FL_PERMANENT))
+ return -EBUSY;
if (!core_kernel_data((unsigned long)ops))
ops->flags |= FTRACE_OPS_FL_DYNAMIC;
@@ -463,10 +465,10 @@ static void *function_stat_start(struct tracer_stat *trace)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* function graph compares on total time */
-static int function_stat_cmp(void *p1, void *p2)
+static int function_stat_cmp(const void *p1, const void *p2)
{
- struct ftrace_profile *a = p1;
- struct ftrace_profile *b = p2;
+ const struct ftrace_profile *a = p1;
+ const struct ftrace_profile *b = p2;
if (a->time < b->time)
return -1;
@@ -477,10 +479,10 @@ static int function_stat_cmp(void *p1, void *p2)
}
#else
/* not function graph compares against hits */
-static int function_stat_cmp(void *p1, void *p2)
+static int function_stat_cmp(const void *p1, const void *p2)
{
- struct ftrace_profile *a = p1;
- struct ftrace_profile *b = p2;
+ const struct ftrace_profile *a = p1;
+ const struct ftrace_profile *b = p2;
if (a->counter < b->counter)
return -1;
@@ -1018,11 +1020,6 @@ static bool update_all_ops;
# error Dynamic ftrace depends on MCOUNT_RECORD
#endif
-struct ftrace_func_entry {
- struct hlist_node hlist;
- unsigned long ip;
-};
-
struct ftrace_func_probe {
struct ftrace_probe_ops *probe_ops;
struct ftrace_ops ops;
@@ -1370,24 +1367,16 @@ ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash);
static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
struct ftrace_hash *new_hash);
-static struct ftrace_hash *
-__ftrace_hash_move(struct ftrace_hash *src)
+static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size)
{
struct ftrace_func_entry *entry;
- struct hlist_node *tn;
- struct hlist_head *hhd;
struct ftrace_hash *new_hash;
- int size = src->count;
+ struct hlist_head *hhd;
+ struct hlist_node *tn;
int bits = 0;
int i;
/*
- * If the new source is empty, just return the empty_hash.
- */
- if (ftrace_hash_empty(src))
- return EMPTY_HASH;
-
- /*
* Make the hash size about 1/2 the # found
*/
for (size /= 2; size; size >>= 1)
@@ -1411,10 +1400,23 @@ __ftrace_hash_move(struct ftrace_hash *src)
__add_hash_entry(new_hash, entry);
}
}
-
return new_hash;
}
+static struct ftrace_hash *
+__ftrace_hash_move(struct ftrace_hash *src)
+{
+ int size = src->count;
+
+ /*
+ * If the new source is empty, just return the empty_hash.
+ */
+ if (ftrace_hash_empty(src))
+ return EMPTY_HASH;
+
+ return dup_hash(src, size);
+}
+
static int
ftrace_hash_move(struct ftrace_ops *ops, int enable,
struct ftrace_hash **dst, struct ftrace_hash *src)
@@ -1534,6 +1536,26 @@ static int ftrace_cmp_recs(const void *a, const void *b)
return 0;
}
+static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
+{
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec = NULL;
+ struct dyn_ftrace key;
+
+ key.ip = start;
+ key.flags = end; /* overload flags, as it is unsigned long */
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ if (end < pg->records[0].ip ||
+ start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
+ continue;
+ rec = bsearch(&key, pg->records, pg->index,
+ sizeof(struct dyn_ftrace),
+ ftrace_cmp_recs);
+ }
+ return rec;
+}
+
/**
* ftrace_location_range - return the first address of a traced location
* if it touches the given ip range
@@ -1548,23 +1570,11 @@ static int ftrace_cmp_recs(const void *a, const void *b)
*/
unsigned long ftrace_location_range(unsigned long start, unsigned long end)
{
- struct ftrace_page *pg;
struct dyn_ftrace *rec;
- struct dyn_ftrace key;
-
- key.ip = start;
- key.flags = end; /* overload flags, as it is unsigned long */
- for (pg = ftrace_pages_start; pg; pg = pg->next) {
- if (end < pg->records[0].ip ||
- start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
- continue;
- rec = bsearch(&key, pg->records, pg->index,
- sizeof(struct dyn_ftrace),
- ftrace_cmp_recs);
- if (rec)
- return rec->ip;
- }
+ rec = lookup_rec(start, end);
+ if (rec)
+ return rec->ip;
return 0;
}
@@ -1715,6 +1725,9 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX))
return false;
+ if (ops->flags & FTRACE_OPS_FL_DIRECT)
+ rec->flags |= FTRACE_FL_DIRECT;
+
/*
* If there's only a single callback registered to a
* function, and the ops has a trampoline registered
@@ -1743,6 +1756,15 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
rec->flags--;
/*
+ * Only the internal direct_ops should have the
+ * DIRECT flag set. Thus, if it is removing a
+ * function, then that function should no longer
+ * be direct.
+ */
+ if (ops->flags & FTRACE_OPS_FL_DIRECT)
+ rec->flags &= ~FTRACE_FL_DIRECT;
+
+ /*
* If the rec had REGS enabled and the ops that is
* being removed had REGS set, then see if there is
* still any ops for this record that wants regs.
@@ -2077,15 +2099,34 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
* If enabling and the REGS flag does not match the REGS_EN, or
* the TRAMP flag doesn't match the TRAMP_EN, then do not ignore
* this record. Set flags to fail the compare against ENABLED.
+ * Same for direct calls.
*/
if (flag) {
- if (!(rec->flags & FTRACE_FL_REGS) !=
+ if (!(rec->flags & FTRACE_FL_REGS) !=
!(rec->flags & FTRACE_FL_REGS_EN))
flag |= FTRACE_FL_REGS;
- if (!(rec->flags & FTRACE_FL_TRAMP) !=
+ if (!(rec->flags & FTRACE_FL_TRAMP) !=
!(rec->flags & FTRACE_FL_TRAMP_EN))
flag |= FTRACE_FL_TRAMP;
+
+ /*
+ * Direct calls are special, as count matters.
+ * We must test the record for direct, if the
+ * DIRECT and DIRECT_EN do not match, but only
+ * if the count is 1. That's because, if the
+ * count is something other than one, we do not
+ * want the direct enabled (it will be done via the
+ * direct helper). But if DIRECT_EN is set, and
+ * the count is not one, we need to clear it.
+ */
+ if (ftrace_rec_count(rec) == 1) {
+ if (!(rec->flags & FTRACE_FL_DIRECT) !=
+ !(rec->flags & FTRACE_FL_DIRECT_EN))
+ flag |= FTRACE_FL_DIRECT;
+ } else if (rec->flags & FTRACE_FL_DIRECT_EN) {
+ flag |= FTRACE_FL_DIRECT;
+ }
}
/* If the state of this record hasn't changed, then do nothing */
@@ -2110,6 +2151,25 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
else
rec->flags &= ~FTRACE_FL_TRAMP_EN;
}
+ if (flag & FTRACE_FL_DIRECT) {
+ /*
+ * If there's only one user (direct_ops helper)
+ * then we can call the direct function
+ * directly (no ftrace trampoline).
+ */
+ if (ftrace_rec_count(rec) == 1) {
+ if (rec->flags & FTRACE_FL_DIRECT)
+ rec->flags |= FTRACE_FL_DIRECT_EN;
+ else
+ rec->flags &= ~FTRACE_FL_DIRECT_EN;
+ } else {
+ /*
+ * Can only call directly if there's
+ * only one callback to the function.
+ */
+ rec->flags &= ~FTRACE_FL_DIRECT_EN;
+ }
+ }
}
/*
@@ -2139,7 +2199,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
* and REGS states. The _EN flags must be disabled though.
*/
rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN |
- FTRACE_FL_REGS_EN);
+ FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN);
}
ftrace_bug_type = FTRACE_BUG_NOP;
@@ -2294,6 +2354,52 @@ ftrace_find_tramp_ops_new(struct dyn_ftrace *rec)
return NULL;
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/* Protected by rcu_tasks for reading, and direct_mutex for writing */
+static struct ftrace_hash *direct_functions = EMPTY_HASH;
+static DEFINE_MUTEX(direct_mutex);
+int ftrace_direct_func_count;
+
+/*
+ * Search the direct_functions hash to see if the given instruction pointer
+ * has a direct caller attached to it.
+ */
+static unsigned long find_rec_direct(unsigned long ip)
+{
+ struct ftrace_func_entry *entry;
+
+ entry = __ftrace_lookup_ip(direct_functions, ip);
+ if (!entry)
+ return 0;
+
+ return entry->direct;
+}
+
+static void call_direct_funcs(unsigned long ip, unsigned long pip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ unsigned long addr;
+
+ addr = find_rec_direct(ip);
+ if (!addr)
+ return;
+
+ arch_ftrace_set_direct_caller(regs, addr);
+}
+
+struct ftrace_ops direct_ops = {
+ .func = call_direct_funcs,
+ .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION_SAFE
+ | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
+ | FTRACE_OPS_FL_PERMANENT,
+};
+#else
+static inline unsigned long find_rec_direct(unsigned long ip)
+{
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
/**
* ftrace_get_addr_new - Get the call address to set to
* @rec: The ftrace record descriptor
@@ -2307,6 +2413,15 @@ ftrace_find_tramp_ops_new(struct dyn_ftrace *rec)
unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
{
struct ftrace_ops *ops;
+ unsigned long addr;
+
+ if ((rec->flags & FTRACE_FL_DIRECT) &&
+ (ftrace_rec_count(rec) == 1)) {
+ addr = find_rec_direct(rec->ip);
+ if (addr)
+ return addr;
+ WARN_ON_ONCE(1);
+ }
/* Trampolines take precedence over regs */
if (rec->flags & FTRACE_FL_TRAMP) {
@@ -2339,6 +2454,15 @@ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
{
struct ftrace_ops *ops;
+ unsigned long addr;
+
+ /* Direct calls take precedence over trampolines */
+ if (rec->flags & FTRACE_FL_DIRECT_EN) {
+ addr = find_rec_direct(rec->ip);
+ if (addr)
+ return addr;
+ WARN_ON_ONCE(1);
+ }
/* Trampolines take precedence over regs */
if (rec->flags & FTRACE_FL_TRAMP_EN) {
@@ -2861,6 +2985,8 @@ static void ftrace_shutdown_sysctl(void)
static u64 ftrace_update_time;
unsigned long ftrace_update_tot_cnt;
+unsigned long ftrace_number_of_pages;
+unsigned long ftrace_number_of_groups;
static inline int ops_traces_mod(struct ftrace_ops *ops)
{
@@ -2985,6 +3111,9 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
goto again;
}
+ ftrace_number_of_pages += 1 << order;
+ ftrace_number_of_groups++;
+
cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
pg->size = cnt;
@@ -3040,6 +3169,8 @@ ftrace_allocate_pages(unsigned long num_to_init)
start_pg = pg->next;
kfree(pg);
pg = start_pg;
+ ftrace_number_of_pages -= 1 << order;
+ ftrace_number_of_groups--;
}
pr_info("ftrace: FAILED to allocate memory for functions\n");
return NULL;
@@ -3450,10 +3581,11 @@ static int t_show(struct seq_file *m, void *v)
if (iter->flags & FTRACE_ITER_ENABLED) {
struct ftrace_ops *ops;
- seq_printf(m, " (%ld)%s%s",
+ seq_printf(m, " (%ld)%s%s%s",
ftrace_rec_count(rec),
rec->flags & FTRACE_FL_REGS ? " R" : " ",
- rec->flags & FTRACE_FL_IPMODIFY ? " I" : " ");
+ rec->flags & FTRACE_FL_IPMODIFY ? " I" : " ",
+ rec->flags & FTRACE_FL_DIRECT ? " D" : " ");
if (rec->flags & FTRACE_FL_TRAMP_EN) {
ops = ftrace_find_tramp_ops_any(rec);
if (ops) {
@@ -3469,6 +3601,13 @@ static int t_show(struct seq_file *m, void *v)
} else {
add_trampoline_func(m, NULL, rec);
}
+ if (rec->flags & FTRACE_FL_DIRECT) {
+ unsigned long direct;
+
+ direct = find_rec_direct(rec->ip);
+ if (direct)
+ seq_printf(m, "\n\tdirect-->%pS", (void *)direct);
+ }
}
seq_putc(m, '\n');
@@ -4800,6 +4939,366 @@ ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable);
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+
+struct ftrace_direct_func {
+ struct list_head next;
+ unsigned long addr;
+ int count;
+};
+
+static LIST_HEAD(ftrace_direct_funcs);
+
+/**
+ * ftrace_find_direct_func - test an address if it is a registered direct caller
+ * @addr: The address of a registered direct caller
+ *
+ * This searches to see if a ftrace direct caller has been registered
+ * at a specific address, and if so, it returns a descriptor for it.
+ *
+ * This can be used by architecture code to see if an address is
+ * a direct caller (trampoline) attached to a fentry/mcount location.
+ * This is useful for the function_graph tracer, as it may need to
+ * do adjustments if it traced a location that also has a direct
+ * trampoline attached to it.
+ */
+struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr)
+{
+ struct ftrace_direct_func *entry;
+ bool found = false;
+
+ /* May be called by fgraph trampoline (protected by rcu tasks) */
+ list_for_each_entry_rcu(entry, &ftrace_direct_funcs, next) {
+ if (entry->addr == addr) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ return entry;
+
+ return NULL;
+}
+
+/**
+ * register_ftrace_direct - Call a custom trampoline directly
+ * @ip: The address of the nop at the beginning of a function
+ * @addr: The address of the trampoline to call at @ip
+ *
+ * This is used to connect a direct call from the nop location (@ip)
+ * at the start of ftrace traced functions. The location that it calls
+ * (@addr) must be able to handle a direct call, and save the parameters
+ * of the function being traced, and restore them (or inject new ones
+ * if needed), before returning.
+ *
+ * Returns:
+ * 0 on success
+ * -EBUSY - Another direct function is already attached (there can be only one)
+ * -ENODEV - @ip does not point to a ftrace nop location (or not supported)
+ * -ENOMEM - There was an allocation failure.
+ */
+int register_ftrace_direct(unsigned long ip, unsigned long addr)
+{
+ struct ftrace_direct_func *direct;
+ struct ftrace_func_entry *entry;
+ struct ftrace_hash *free_hash = NULL;
+ struct dyn_ftrace *rec;
+ int ret = -EBUSY;
+
+ mutex_lock(&direct_mutex);
+
+ /* See if there's a direct function at @ip already */
+ if (find_rec_direct(ip))
+ goto out_unlock;
+
+ ret = -ENODEV;
+ rec = lookup_rec(ip, ip);
+ if (!rec)
+ goto out_unlock;
+
+ /*
+ * Check if the rec says it has a direct call but we didn't
+ * find one earlier?
+ */
+ if (WARN_ON(rec->flags & FTRACE_FL_DIRECT))
+ goto out_unlock;
+
+ /* Make sure the ip points to the exact record */
+ if (ip != rec->ip) {
+ ip = rec->ip;
+ /* Need to check this ip for a direct. */
+ if (find_rec_direct(ip))
+ goto out_unlock;
+ }
+
+ ret = -ENOMEM;
+ if (ftrace_hash_empty(direct_functions) ||
+ direct_functions->count > 2 * (1 << direct_functions->size_bits)) {
+ struct ftrace_hash *new_hash;
+ int size = ftrace_hash_empty(direct_functions) ? 0 :
+ direct_functions->count + 1;
+
+ if (size < 32)
+ size = 32;
+
+ new_hash = dup_hash(direct_functions, size);
+ if (!new_hash)
+ goto out_unlock;
+
+ free_hash = direct_functions;
+ direct_functions = new_hash;
+ }
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ goto out_unlock;
+
+ direct = ftrace_find_direct_func(addr);
+ if (!direct) {
+ direct = kmalloc(sizeof(*direct), GFP_KERNEL);
+ if (!direct) {
+ kfree(entry);
+ goto out_unlock;
+ }
+ direct->addr = addr;
+ direct->count = 0;
+ list_add_rcu(&direct->next, &ftrace_direct_funcs);
+ ftrace_direct_func_count++;
+ }
+
+ entry->ip = ip;
+ entry->direct = addr;
+ __add_hash_entry(direct_functions, entry);
+
+ ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
+ if (ret)
+ remove_hash_entry(direct_functions, entry);
+
+ if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
+ ret = register_ftrace_function(&direct_ops);
+ if (ret)
+ ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
+ }
+
+ if (ret) {
+ kfree(entry);
+ if (!direct->count) {
+ list_del_rcu(&direct->next);
+ synchronize_rcu_tasks();
+ kfree(direct);
+ if (free_hash)
+ free_ftrace_hash(free_hash);
+ free_hash = NULL;
+ ftrace_direct_func_count--;
+ }
+ } else {
+ direct->count++;
+ }
+ out_unlock:
+ mutex_unlock(&direct_mutex);
+
+ if (free_hash) {
+ synchronize_rcu_tasks();
+ free_ftrace_hash(free_hash);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(register_ftrace_direct);
+
+static struct ftrace_func_entry *find_direct_entry(unsigned long *ip,
+ struct dyn_ftrace **recp)
+{
+ struct ftrace_func_entry *entry;
+ struct dyn_ftrace *rec;
+
+ rec = lookup_rec(*ip, *ip);
+ if (!rec)
+ return NULL;
+
+ entry = __ftrace_lookup_ip(direct_functions, rec->ip);
+ if (!entry) {
+ WARN_ON(rec->flags & FTRACE_FL_DIRECT);
+ return NULL;
+ }
+
+ WARN_ON(!(rec->flags & FTRACE_FL_DIRECT));
+
+ /* Passed in ip just needs to be on the call site */
+ *ip = rec->ip;
+
+ if (recp)
+ *recp = rec;
+
+ return entry;
+}
+
+int unregister_ftrace_direct(unsigned long ip, unsigned long addr)
+{
+ struct ftrace_direct_func *direct;
+ struct ftrace_func_entry *entry;
+ int ret = -ENODEV;
+
+ mutex_lock(&direct_mutex);
+
+ entry = find_direct_entry(&ip, NULL);
+ if (!entry)
+ goto out_unlock;
+
+ if (direct_functions->count == 1)
+ unregister_ftrace_function(&direct_ops);
+
+ ret = ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
+
+ WARN_ON(ret);
+
+ remove_hash_entry(direct_functions, entry);
+
+ direct = ftrace_find_direct_func(addr);
+ if (!WARN_ON(!direct)) {
+ /* This is the good path (see the ! before WARN) */
+ direct->count--;
+ WARN_ON(direct->count < 0);
+ if (!direct->count) {
+ list_del_rcu(&direct->next);
+ synchronize_rcu_tasks();
+ kfree(direct);
+ ftrace_direct_func_count--;
+ }
+ }
+ out_unlock:
+ mutex_unlock(&direct_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(unregister_ftrace_direct);
+
+static struct ftrace_ops stub_ops = {
+ .func = ftrace_stub,
+};
+
+/**
+ * ftrace_modify_direct_caller - modify ftrace nop directly
+ * @entry: The ftrace hash entry of the direct helper for @rec
+ * @rec: The record representing the function site to patch
+ * @old_addr: The location that the site at @rec->ip currently calls
+ * @new_addr: The location that the site at @rec->ip should call
+ *
+ * An architecture may overwrite this function to optimize the
+ * changing of the direct callback on an ftrace nop location.
+ * This is called with the ftrace_lock mutex held, and no other
+ * ftrace callbacks are on the associated record (@rec). Thus,
+ * it is safe to modify the ftrace record, where it should be
+ * currently calling @old_addr directly, to call @new_addr.
+ *
+ * Safety checks should be made to make sure that the code at
+ * @rec->ip is currently calling @old_addr. And this must
+ * also update entry->direct to @new_addr.
+ */
+int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
+ struct dyn_ftrace *rec,
+ unsigned long old_addr,
+ unsigned long new_addr)
+{
+ unsigned long ip = rec->ip;
+ int ret;
+
+ /*
+ * The ftrace_lock was used to determine if the record
+ * had more than one registered user to it. If it did,
+ * we needed to prevent that from changing to do the quick
+ * switch. But if it did not (only a direct caller was attached)
+ * then this function is called. But this function can deal
+ * with attached callers to the rec that we care about, and
+ * since this function uses standard ftrace calls that take
+ * the ftrace_lock mutex, we need to release it.
+ */
+ mutex_unlock(&ftrace_lock);
+
+ /*
+ * By setting a stub function at the same address, we force
+ * the code to call the iterator and the direct_ops helper.
+ * This means that @ip does not call the direct call, and
+ * we can simply modify it.
+ */
+ ret = ftrace_set_filter_ip(&stub_ops, ip, 0, 0);
+ if (ret)
+ goto out_lock;
+
+ ret = register_ftrace_function(&stub_ops);
+ if (ret) {
+ ftrace_set_filter_ip(&stub_ops, ip, 1, 0);
+ goto out_lock;
+ }
+
+ entry->direct = new_addr;
+
+ /*
+ * By removing the stub, we put back the direct call, calling
+ * the @new_addr.
+ */
+ unregister_ftrace_function(&stub_ops);
+ ftrace_set_filter_ip(&stub_ops, ip, 1, 0);
+
+ out_lock:
+ mutex_lock(&ftrace_lock);
+
+ return ret;
+}
+
+/**
+ * modify_ftrace_direct - Modify an existing direct call to call something else
+ * @ip: The instruction pointer to modify
+ * @old_addr: The address that the current @ip calls directly
+ * @new_addr: The address that the @ip should call
+ *
+ * This modifies a ftrace direct caller at an instruction pointer without
+ * having to disable it first. The direct call will switch over to the
+ * @new_addr without missing anything.
+ *
+ * Returns: zero on success. Non zero on error, which includes:
+ * -ENODEV : the @ip given has no direct caller attached
+ * -EINVAL : the @old_addr does not match the current direct caller
+ */
+int modify_ftrace_direct(unsigned long ip,
+ unsigned long old_addr, unsigned long new_addr)
+{
+ struct ftrace_func_entry *entry;
+ struct dyn_ftrace *rec;
+ int ret = -ENODEV;
+
+ mutex_lock(&direct_mutex);
+
+ mutex_lock(&ftrace_lock);
+ entry = find_direct_entry(&ip, &rec);
+ if (!entry)
+ goto out_unlock;
+
+ ret = -EINVAL;
+ if (entry->direct != old_addr)
+ goto out_unlock;
+
+ /*
+ * If there's no other ftrace callback on the rec->ip location,
+ * then it can be changed directly by the architecture.
+ * If there is another caller, then we just need to change the
+ * direct caller helper to point to @new_addr.
+ */
+ if (ftrace_rec_count(rec) == 1) {
+ ret = ftrace_modify_direct_caller(entry, rec, old_addr, new_addr);
+ } else {
+ entry->direct = new_addr;
+ ret = 0;
+ }
+
+ out_unlock:
+ mutex_unlock(&ftrace_lock);
+ mutex_unlock(&direct_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(modify_ftrace_direct);
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
/**
* ftrace_set_filter_ip - set a function to filter on in ftrace by address
* @ops - the ops to set the filter with
@@ -5818,6 +6317,8 @@ void ftrace_release_mod(struct module *mod)
free_pages((unsigned long)pg->records, order);
tmp_page = pg->next;
kfree(pg);
+ ftrace_number_of_pages -= 1 << order;
+ ftrace_number_of_groups--;
}
}
@@ -6159,6 +6660,8 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
*last_pg = pg->next;
order = get_count_order(pg->size / ENTRIES_PER_PAGE);
free_pages((unsigned long)pg->records, order);
+ ftrace_number_of_pages -= 1 << order;
+ ftrace_number_of_groups--;
kfree(pg);
pg = container_of(last_pg, struct ftrace_page, next);
if (!(*last_pg))
@@ -6214,6 +6717,9 @@ void __init ftrace_init(void)
__start_mcount_loc,
__stop_mcount_loc);
+ pr_info("ftrace: allocated %ld pages with %ld groups\n",
+ ftrace_number_of_pages, ftrace_number_of_groups);
+
set_ftrace_early_filters();
return;
@@ -6754,6 +7260,18 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
}
EXPORT_SYMBOL_GPL(unregister_ftrace_function);
+static bool is_permanent_ops_registered(void)
+{
+ struct ftrace_ops *op;
+
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+ if (op->flags & FTRACE_OPS_FL_PERMANENT)
+ return true;
+ } while_for_each_ftrace_op(op);
+
+ return false;
+}
+
int
ftrace_enable_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -6771,8 +7289,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
goto out;
- last_ftrace_enabled = !!ftrace_enabled;
-
if (ftrace_enabled) {
/* we are starting ftrace again */
@@ -6783,12 +7299,19 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
ftrace_startup_sysctl();
} else {
+ if (is_permanent_ops_registered()) {
+ ftrace_enabled = true;
+ ret = -EBUSY;
+ goto out;
+ }
+
/* stopping ftrace calls (just send to ftrace_stub) */
ftrace_trace_function = ftrace_stub;
ftrace_shutdown_sysctl();
}
+ last_ftrace_enabled = !!ftrace_enabled;
out:
mutex_unlock(&ftrace_lock);
return ret;
diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index d8765c952fab..31c0fad4cb9e 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -10,18 +10,25 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
+#include <linux/kobject.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/string.h>
+#include <linux/sysfs.h>
static ulong delay = 100;
-static char test_mode[10] = "irq";
+static char test_mode[12] = "irq";
+static uint burst_size = 1;
-module_param_named(delay, delay, ulong, S_IRUGO);
-module_param_string(test_mode, test_mode, 10, S_IRUGO);
-MODULE_PARM_DESC(delay, "Period in microseconds (100 uS default)");
-MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt or irq (default irq)");
+module_param_named(delay, delay, ulong, 0444);
+module_param_string(test_mode, test_mode, 12, 0444);
+module_param_named(burst_size, burst_size, uint, 0444);
+MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)");
+MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)");
+MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)");
+
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
static void busy_wait(ulong time)
{
@@ -34,37 +41,136 @@ static void busy_wait(ulong time)
} while ((end - start) < (time * 1000));
}
-static int preemptirq_delay_run(void *data)
+static __always_inline void irqoff_test(void)
{
unsigned long flags;
+ local_irq_save(flags);
+ busy_wait(delay);
+ local_irq_restore(flags);
+}
- if (!strcmp(test_mode, "irq")) {
- local_irq_save(flags);
- busy_wait(delay);
- local_irq_restore(flags);
- } else if (!strcmp(test_mode, "preempt")) {
- preempt_disable();
- busy_wait(delay);
- preempt_enable();
+static __always_inline void preemptoff_test(void)
+{
+ preempt_disable();
+ busy_wait(delay);
+ preempt_enable();
+}
+
+static void execute_preemptirqtest(int idx)
+{
+ if (!strcmp(test_mode, "irq"))
+ irqoff_test();
+ else if (!strcmp(test_mode, "preempt"))
+ preemptoff_test();
+ else if (!strcmp(test_mode, "alternate")) {
+ if (idx % 2 == 0)
+ irqoff_test();
+ else
+ preemptoff_test();
}
+}
+
+#define DECLARE_TESTFN(POSTFIX) \
+ static void preemptirqtest_##POSTFIX(int idx) \
+ { \
+ execute_preemptirqtest(idx); \
+ } \
+/*
+ * We create 10 different functions, so that we can get 10 different
+ * backtraces.
+ */
+DECLARE_TESTFN(0)
+DECLARE_TESTFN(1)
+DECLARE_TESTFN(2)
+DECLARE_TESTFN(3)
+DECLARE_TESTFN(4)
+DECLARE_TESTFN(5)
+DECLARE_TESTFN(6)
+DECLARE_TESTFN(7)
+DECLARE_TESTFN(8)
+DECLARE_TESTFN(9)
+
+static void (*testfuncs[])(int) = {
+ preemptirqtest_0,
+ preemptirqtest_1,
+ preemptirqtest_2,
+ preemptirqtest_3,
+ preemptirqtest_4,
+ preemptirqtest_5,
+ preemptirqtest_6,
+ preemptirqtest_7,
+ preemptirqtest_8,
+ preemptirqtest_9,
+};
+
+#define NR_TEST_FUNCS ARRAY_SIZE(testfuncs)
+
+static int preemptirq_delay_run(void *data)
+{
+ int i;
+ int s = MIN(burst_size, NR_TEST_FUNCS);
+
+ for (i = 0; i < s; i++)
+ (testfuncs[i])(i);
return 0;
}
-static int __init preemptirq_delay_init(void)
+static struct task_struct *preemptirq_start_test(void)
{
char task_name[50];
- struct task_struct *test_task;
snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
+ return kthread_run(preemptirq_delay_run, NULL, task_name);
+}
+
+
+static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ preemptirq_start_test();
+ return count;
+}
+
+static struct kobj_attribute trigger_attribute =
+ __ATTR(trigger, 0200, NULL, trigger_store);
+
+static struct attribute *attrs[] = {
+ &trigger_attribute.attr,
+ NULL,
+};
+
+static struct attribute_group attr_group = {
+ .attrs = attrs,
+};
+
+static struct kobject *preemptirq_delay_kobj;
+
+static int __init preemptirq_delay_init(void)
+{
+ struct task_struct *test_task;
+ int retval;
+
+ test_task = preemptirq_start_test();
+ retval = PTR_ERR_OR_ZERO(test_task);
+ if (retval != 0)
+ return retval;
+
+ preemptirq_delay_kobj = kobject_create_and_add("preemptirq_delay_test",
+ kernel_kobj);
+ if (!preemptirq_delay_kobj)
+ return -ENOMEM;
+
+ retval = sysfs_create_group(preemptirq_delay_kobj, &attr_group);
+ if (retval)
+ kobject_put(preemptirq_delay_kobj);
- test_task = kthread_run(preemptirq_delay_run, NULL, task_name);
- return PTR_ERR_OR_ZERO(test_task);
+ return retval;
}
static void __exit preemptirq_delay_exit(void)
{
- return;
+ kobject_put(preemptirq_delay_kobj);
}
module_init(preemptirq_delay_init)
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 09b0b49f346e..32149e46551c 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -269,10 +269,10 @@ static void ring_buffer_producer(void)
#ifndef CONFIG_PREEMPTION
/*
- * If we are a non preempt kernel, the 10 second run will
+ * If we are a non preempt kernel, the 10 seconds run will
* stop everything while it runs. Instead, we will call
* cond_resched and also add any time that was lost by a
- * rescedule.
+ * reschedule.
*
* Do a cond resched at the same frequency we would wake up
* the reader.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6a0ee9178365..02a23a6e5e00 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -45,6 +45,9 @@
#include <linux/trace.h>
#include <linux/sched/clock.h>
#include <linux/sched/rt.h>
+#include <linux/fsnotify.h>
+#include <linux/irq_work.h>
+#include <linux/workqueue.h>
#include "trace.h"
#include "trace_output.h"
@@ -298,12 +301,24 @@ static void __trace_array_put(struct trace_array *this_tr)
this_tr->ref--;
}
+/**
+ * trace_array_put - Decrement the reference counter for this trace array.
+ *
+ * NOTE: Use this when we no longer need the trace array returned by
+ * trace_array_get_by_name(). This ensures the trace array can be later
+ * destroyed.
+ *
+ */
void trace_array_put(struct trace_array *this_tr)
{
+ if (!this_tr)
+ return;
+
mutex_lock(&trace_types_lock);
__trace_array_put(this_tr);
mutex_unlock(&trace_types_lock);
}
+EXPORT_SYMBOL_GPL(trace_array_put);
int tracing_check_open_get_tr(struct trace_array *tr)
{
@@ -1497,6 +1512,74 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
}
unsigned long __read_mostly tracing_thresh;
+static const struct file_operations tracing_max_lat_fops;
+
+#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
+ defined(CONFIG_FSNOTIFY)
+
+static struct workqueue_struct *fsnotify_wq;
+
+static void latency_fsnotify_workfn(struct work_struct *work)
+{
+ struct trace_array *tr = container_of(work, struct trace_array,
+ fsnotify_work);
+ fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
+ tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+}
+
+static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
+{
+ struct trace_array *tr = container_of(iwork, struct trace_array,
+ fsnotify_irqwork);
+ queue_work(fsnotify_wq, &tr->fsnotify_work);
+}
+
+static void trace_create_maxlat_file(struct trace_array *tr,
+ struct dentry *d_tracer)
+{
+ INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
+ init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
+ tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
+ d_tracer, &tr->max_latency,
+ &tracing_max_lat_fops);
+}
+
+__init static int latency_fsnotify_init(void)
+{
+ fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
+ WQ_UNBOUND | WQ_HIGHPRI, 0);
+ if (!fsnotify_wq) {
+ pr_err("Unable to allocate tr_max_lat_wq\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+late_initcall_sync(latency_fsnotify_init);
+
+void latency_fsnotify(struct trace_array *tr)
+{
+ if (!fsnotify_wq)
+ return;
+ /*
+ * We cannot call queue_work(&tr->fsnotify_work) from here because it's
+ * possible that we are called from __schedule() or do_idle(), which
+ * could cause a deadlock.
+ */
+ irq_work_queue(&tr->fsnotify_irqwork);
+}
+
+/*
+ * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
+ * defined(CONFIG_FSNOTIFY)
+ */
+#else
+
+#define trace_create_maxlat_file(tr, d_tracer) \
+ trace_create_file("tracing_max_latency", 0644, d_tracer, \
+ &tr->max_latency, &tracing_max_lat_fops)
+
+#endif
#ifdef CONFIG_TRACER_MAX_TRACE
/*
@@ -1536,6 +1619,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
/* record this tasks comm */
tracing_record_cmdline(tsk);
+ latency_fsnotify(tr);
}
/**
@@ -3225,6 +3309,9 @@ int trace_array_printk(struct trace_array *tr,
if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
return 0;
+ if (!tr)
+ return -ENOENT;
+
va_start(ap, fmt);
ret = trace_array_vprintk(tr, ip, fmt, ap);
va_end(ap);
@@ -3654,6 +3741,8 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
"desktop",
#elif defined(CONFIG_PREEMPT)
"preempt",
+#elif defined(CONFIG_PREEMPT_RT)
+ "preempt_rt",
#else
"unknown",
#endif
@@ -4609,7 +4698,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (mask == TRACE_ITER_RECORD_TGID) {
if (!tgid_map)
- tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
+ tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
sizeof(*tgid_map),
GFP_KERNEL);
if (!tgid_map) {
@@ -7583,14 +7672,23 @@ static ssize_t
tracing_read_dyn_info(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- unsigned long *p = filp->private_data;
- char buf[64]; /* Not too big for a shallow stack */
+ ssize_t ret;
+ char *buf;
int r;
- r = scnprintf(buf, 63, "%ld", *p);
- buf[r++] = '\n';
+ /* 256 should be plenty to hold the amount needed */
+ buf = kmalloc(256, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
+ ftrace_update_tot_cnt,
+ ftrace_number_of_pages,
+ ftrace_number_of_groups);
+
+ ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ kfree(buf);
+ return ret;
}
static const struct file_operations tracing_dyn_info_fops = {
@@ -8351,24 +8449,15 @@ static void update_tracer_options(struct trace_array *tr)
mutex_unlock(&trace_types_lock);
}
-struct trace_array *trace_array_create(const char *name)
+static struct trace_array *trace_array_create(const char *name)
{
struct trace_array *tr;
int ret;
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
-
- ret = -EEXIST;
- list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- if (tr->name && strcmp(tr->name, name) == 0)
- goto out_unlock;
- }
-
ret = -ENOMEM;
tr = kzalloc(sizeof(*tr), GFP_KERNEL);
if (!tr)
- goto out_unlock;
+ return ERR_PTR(ret);
tr->name = kstrdup(name, GFP_KERNEL);
if (!tr->name)
@@ -8413,8 +8502,8 @@ struct trace_array *trace_array_create(const char *name)
list_add(&tr->list, &ftrace_trace_arrays);
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
+ tr->ref++;
+
return tr;
@@ -8424,24 +8513,77 @@ struct trace_array *trace_array_create(const char *name)
kfree(tr->name);
kfree(tr);
- out_unlock:
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
-
return ERR_PTR(ret);
}
-EXPORT_SYMBOL_GPL(trace_array_create);
static int instance_mkdir(const char *name)
{
- return PTR_ERR_OR_ZERO(trace_array_create(name));
+ struct trace_array *tr;
+ int ret;
+
+ mutex_lock(&event_mutex);
+ mutex_lock(&trace_types_lock);
+
+ ret = -EEXIST;
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr->name && strcmp(tr->name, name) == 0)
+ goto out_unlock;
+ }
+
+ tr = trace_array_create(name);
+
+ ret = PTR_ERR_OR_ZERO(tr);
+
+out_unlock:
+ mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
+ return ret;
+}
+
+/**
+ * trace_array_get_by_name - Create/Lookup a trace array, given its name.
+ * @name: The name of the trace array to be looked up/created.
+ *
+ * Returns pointer to trace array with given name.
+ * NULL, if it cannot be created.
+ *
+ * NOTE: This function increments the reference counter associated with the
+ * trace array returned. This makes sure it cannot be freed while in use.
+ * Use trace_array_put() once the trace array is no longer needed.
+ *
+ */
+struct trace_array *trace_array_get_by_name(const char *name)
+{
+ struct trace_array *tr;
+
+ mutex_lock(&event_mutex);
+ mutex_lock(&trace_types_lock);
+
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr->name && strcmp(tr->name, name) == 0)
+ goto out_unlock;
+ }
+
+ tr = trace_array_create(name);
+
+ if (IS_ERR(tr))
+ tr = NULL;
+out_unlock:
+ if (tr)
+ tr->ref++;
+
+ mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
+ return tr;
}
+EXPORT_SYMBOL_GPL(trace_array_get_by_name);
static int __remove_instance(struct trace_array *tr)
{
int i;
- if (tr->ref || (tr->current_trace && tr->current_trace->ref))
+ /* Reference counter for a newly created trace array = 1. */
+ if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
return -EBUSY;
list_del(&tr->list);
@@ -8473,17 +8615,26 @@ static int __remove_instance(struct trace_array *tr)
return 0;
}
-int trace_array_destroy(struct trace_array *tr)
+int trace_array_destroy(struct trace_array *this_tr)
{
+ struct trace_array *tr;
int ret;
- if (!tr)
+ if (!this_tr)
return -EINVAL;
mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
- ret = __remove_instance(tr);
+ ret = -ENODEV;
+
+ /* Making sure trace array exists before destroying it. */
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr == this_tr) {
+ ret = __remove_instance(tr);
+ break;
+ }
+ }
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
@@ -8585,8 +8736,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
create_trace_options_dir(tr);
#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
- trace_create_file("tracing_max_latency", 0644, d_tracer,
- &tr->max_latency, &tracing_max_lat_fops);
+ trace_create_maxlat_file(tr, d_tracer);
#endif
if (ftrace_create_function_files(tr, d_tracer))
@@ -8782,7 +8932,7 @@ static __init int tracer_init_tracefs(void)
#ifdef CONFIG_DYNAMIC_FTRACE
trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
- &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
+ NULL, &tracing_dyn_info_fops);
#endif
create_trace_instances(d_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d685c61085c0..ca7fccafbcbb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,11 +11,14 @@
#include <linux/mmiotrace.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
+#include <linux/trace.h>
#include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h>
#include <linux/trace_events.h>
#include <linux/compiler.h>
#include <linux/glob.h>
+#include <linux/irq_work.h>
+#include <linux/workqueue.h>
#ifdef CONFIG_FTRACE_SYSCALLS
#include <asm/unistd.h> /* For NR_SYSCALLS */
@@ -264,6 +267,11 @@ struct trace_array {
#endif
#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
unsigned long max_latency;
+#ifdef CONFIG_FSNOTIFY
+ struct dentry *d_max_latency;
+ struct work_struct fsnotify_work;
+ struct irq_work fsnotify_irqwork;
+#endif
#endif
struct trace_pid_list __rcu *filtered_pids;
/*
@@ -337,7 +345,6 @@ extern struct list_head ftrace_trace_arrays;
extern struct mutex trace_types_lock;
extern int trace_array_get(struct trace_array *tr);
-extern void trace_array_put(struct trace_array *tr);
extern int tracing_check_open_get_tr(struct trace_array *tr);
extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
@@ -786,6 +793,17 @@ void update_max_tr_single(struct trace_array *tr,
struct task_struct *tsk, int cpu);
#endif /* CONFIG_TRACER_MAX_TRACE */
+#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
+ defined(CONFIG_FSNOTIFY)
+
+void latency_fsnotify(struct trace_array *tr);
+
+#else
+
+static inline void latency_fsnotify(struct trace_array *tr) { }
+
+#endif
+
#ifdef CONFIG_STACKTRACE
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
int pc);
@@ -804,6 +822,8 @@ extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
+extern unsigned long ftrace_number_of_pages;
+extern unsigned long ftrace_number_of_groups;
void ftrace_init_trace_array(struct trace_array *tr);
#else
static inline void ftrace_init_trace_array(struct trace_array *tr) { }
@@ -853,8 +873,6 @@ trace_vprintk(unsigned long ip, const char *fmt, va_list args);
extern int
trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args);
-int trace_array_printk(struct trace_array *tr,
- unsigned long ip, const char *fmt, ...);
int trace_array_printk_buf(struct ring_buffer *buffer,
unsigned long ip, const char *fmt, ...);
void trace_printk_seq(struct trace_seq *s);
@@ -1870,7 +1888,6 @@ extern const char *__start___tracepoint_str[];
extern const char *__stop___tracepoint_str[];
void trace_printk_control(bool enabled);
-void trace_printk_init_buffers(void);
void trace_printk_start_comm(void);
int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 3ea65cdff30d..88e158d27965 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -244,7 +244,7 @@ static int annotated_branch_stat_headers(struct seq_file *m)
return 0;
}
-static inline long get_incorrect_percent(struct ftrace_branch_data *p)
+static inline long get_incorrect_percent(const struct ftrace_branch_data *p)
{
long percent;
@@ -332,10 +332,10 @@ annotated_branch_stat_next(void *v, int idx)
return p;
}
-static int annotated_branch_stat_cmp(void *p1, void *p2)
+static int annotated_branch_stat_cmp(const void *p1, const void *p2)
{
- struct ftrace_branch_data *a = p1;
- struct ftrace_branch_data *b = p2;
+ const struct ftrace_branch_data *a = p1;
+ const struct ftrace_branch_data *b = p2;
long percent_a, percent_b;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index fba87d10f0c1..6b3a69e9aa6a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -793,6 +793,8 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
char *event = NULL, *sub = NULL, *match;
int ret;
+ if (!tr)
+ return -ENOENT;
/*
* The buf format can be <subsystem>:<event-name>
* *:<event-name> means any event by that name.
@@ -825,7 +827,6 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
return ret;
}
-EXPORT_SYMBOL_GPL(ftrace_set_clr_event);
/**
* trace_set_clr_event - enable or disable an event
@@ -850,6 +851,32 @@ int trace_set_clr_event(const char *system, const char *event, int set)
}
EXPORT_SYMBOL_GPL(trace_set_clr_event);
+/**
+ * trace_array_set_clr_event - enable or disable an event for a trace array.
+ * @tr: concerned trace array.
+ * @system: system name to match (NULL for any system)
+ * @event: event name to match (NULL for all events, within system)
+ * @enable: true to enable, false to disable
+ *
+ * This is a way for other parts of the kernel to enable or disable
+ * event recording.
+ *
+ * Returns 0 on success, -EINVAL if the parameters do not match any
+ * registered events.
+ */
+int trace_array_set_clr_event(struct trace_array *tr, const char *system,
+ const char *event, bool enable)
+{
+ int set;
+
+ if (!tr)
+ return -ENOENT;
+
+ set = (enable == true) ? 1 : 0;
+ return __ftrace_set_clr_event(tr, NULL, system, event, set);
+}
+EXPORT_SYMBOL_GPL(trace_array_set_clr_event);
+
/* 128 should be much more than enough */
#define EVENT_BUF_SIZE 127
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 7482a1466ebf..f49d1a36d3ae 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -23,7 +23,7 @@
#include "trace_dynevent.h"
#define SYNTH_SYSTEM "synthetic"
-#define SYNTH_FIELDS_MAX 16
+#define SYNTH_FIELDS_MAX 32
#define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 45630a76ed3a..2e6d2e9741cc 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -171,7 +171,7 @@ ftrace_define_fields_##name(struct trace_event_call *event_call) \
#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
regfn) \
\
-struct trace_event_class __refdata event_class_ftrace_##call = { \
+static struct trace_event_class __refdata event_class_ftrace_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
.fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
@@ -187,7 +187,7 @@ struct trace_event_call __used event_##call = { \
.print_fmt = print, \
.flags = TRACE_EVENT_FL_IGNORE_ENABLE, \
}; \
-struct trace_event_call __used \
+static struct trace_event_call __used \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
#undef FTRACE_ENTRY
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 862f4b0139fc..6638d63f0921 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * trace_hwlatdetect.c - A simple Hardware Latency detector.
+ * trace_hwlat.c - A simple Hardware Latency detector.
*
* Use this tracer to detect large system latencies induced by the behavior of
* certain underlying system hardware or firmware, independent of Linux itself.
@@ -237,6 +237,7 @@ static int get_sample(void)
/* If we exceed the threshold value, we have found a hardware latency */
if (sample > thresh || outer_sample > thresh) {
struct hwlat_sample s;
+ u64 latency;
ret = 1;
@@ -253,11 +254,13 @@ static int get_sample(void)
s.nmi_count = nmi_count;
trace_hwlat_sample(&s);
+ latency = max(sample, outer_sample);
+
/* Keep a running maximum ever recorded hardware latency */
- if (sample > tr->max_latency)
- tr->max_latency = sample;
- if (outer_sample > tr->max_latency)
- tr->max_latency = outer_sample;
+ if (latency > tr->max_latency) {
+ tr->max_latency = latency;
+ latency_fsnotify(tr);
+ }
}
out:
@@ -276,7 +279,7 @@ static void move_to_next_cpu(void)
return;
/*
* If for some reason the user modifies the CPU affinity
- * of this thread, than stop migrating for the duration
+ * of this thread, then stop migrating for the duration
* of the current test.
*/
if (!cpumask_equal(current_mask, current->cpus_ptr))
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 1552a95c743b..7f890262c8a3 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -435,11 +435,10 @@ static int disable_trace_kprobe(struct trace_event_call *call,
#if defined(CONFIG_KPROBES_ON_FTRACE) && \
!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
-static bool within_notrace_func(struct trace_kprobe *tk)
+static bool __within_notrace_func(unsigned long addr)
{
- unsigned long offset, size, addr;
+ unsigned long offset, size;
- addr = trace_kprobe_address(tk);
if (!addr || !kallsyms_lookup_size_offset(addr, &size, &offset))
return false;
@@ -452,6 +451,28 @@ static bool within_notrace_func(struct trace_kprobe *tk)
*/
return !ftrace_location_range(addr, addr + size - 1);
}
+
+static bool within_notrace_func(struct trace_kprobe *tk)
+{
+ unsigned long addr = addr = trace_kprobe_address(tk);
+ char symname[KSYM_NAME_LEN], *p;
+
+ if (!__within_notrace_func(addr))
+ return false;
+
+ /* Check if the address is on a suffixed-symbol */
+ if (!lookup_symbol_name(addr, symname)) {
+ p = strchr(symname, '.');
+ if (!p)
+ return true;
+ *p = '\0';
+ addr = (unsigned long)kprobe_lookup_name(symname, 0);
+ if (addr)
+ return __within_notrace_func(addr);
+ }
+
+ return true;
+}
#else
#define within_notrace_func(tk) (false)
#endif
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d54ce252b05a..d9b4b7c22db4 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -274,6 +274,21 @@ trace_print_array_seq(struct trace_seq *p, const void *buf, int count,
}
EXPORT_SYMBOL(trace_print_array_seq);
+const char *
+trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str,
+ int prefix_type, int rowsize, int groupsize,
+ const void *buf, size_t len, bool ascii)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_putc(p, '\n');
+ trace_seq_hex_dump(p, prefix_str, prefix_type,
+ rowsize, groupsize, buf, len, ascii);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+EXPORT_SYMBOL(trace_print_hex_dump_seq);
+
int trace_raw_output_prep(struct trace_iterator *iter,
struct trace_event *trace_event)
{
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index 6b1c562ffdaf..344e4c1aa09c 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -376,3 +376,33 @@ int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt)
return seq_buf_to_user(&s->seq, ubuf, cnt);
}
EXPORT_SYMBOL_GPL(trace_seq_to_user);
+
+int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str,
+ int prefix_type, int rowsize, int groupsize,
+ const void *buf, size_t len, bool ascii)
+{
+ unsigned int save_len = s->seq.len;
+
+ if (s->full)
+ return 0;
+
+ __trace_seq_init(s);
+
+ if (TRACE_SEQ_BUF_LEFT(s) < 1) {
+ s->full = 1;
+ return 0;
+ }
+
+ seq_buf_hex_dump(&(s->seq), prefix_str,
+ prefix_type, rowsize, groupsize,
+ buf, len, ascii);
+
+ if (unlikely(seq_buf_has_overflowed(&s->seq))) {
+ s->seq.len = save_len;
+ s->full = 1;
+ return 0;
+ }
+
+ return 1;
+}
+EXPORT_SYMBOL(trace_seq_hex_dump);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 9ab0a1a7ad5e..874f1274cf99 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -72,9 +72,7 @@ static void destroy_session(struct stat_session *session)
kfree(session);
}
-typedef int (*cmp_stat_t)(void *, void *);
-
-static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp)
+static int insert_stat(struct rb_root *root, void *stat, cmp_func_t cmp)
{
struct rb_node **new = &(root->rb_node), *parent = NULL;
struct stat_node *data;
@@ -112,7 +110,7 @@ static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp)
* This one will force an insertion as right-most node
* in the rbtree.
*/
-static int dummy_cmp(void *p1, void *p2)
+static int dummy_cmp(const void *p1, const void *p2)
{
return -1;
}
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index 8786d17caf49..31d7dc5bf1db 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -16,7 +16,7 @@ struct tracer_stat {
void *(*stat_start)(struct tracer_stat *trace);
void *(*stat_next)(void *prev, int idx);
/* Compare two entries for stats sorting */
- int (*stat_cmp)(void *p1, void *p2);
+ cmp_func_t stat_cmp;
/* Print a stat entry */
int (*stat_show)(struct seq_file *s, void *p);
/* Release an entry */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index fa8fbff736d6..16fa218556fa 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -7,6 +7,7 @@
#include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
#include <linux/ftrace.h>
#include <linux/perf_event.h>
+#include <linux/xarray.h>
#include <asm/syscall.h>
#include "trace_output.h"
@@ -30,6 +31,7 @@ syscall_get_enter_fields(struct trace_event_call *call)
extern struct syscall_metadata *__start_syscalls_metadata[];
extern struct syscall_metadata *__stop_syscalls_metadata[];
+static DEFINE_XARRAY(syscalls_metadata_sparse);
static struct syscall_metadata **syscalls_metadata;
#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
@@ -101,6 +103,9 @@ find_syscall_meta(unsigned long syscall)
static struct syscall_metadata *syscall_nr_to_meta(int nr)
{
+ if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR))
+ return xa_load(&syscalls_metadata_sparse, (unsigned long)nr);
+
if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
return NULL;
@@ -536,12 +541,16 @@ void __init init_ftrace_syscalls(void)
struct syscall_metadata *meta;
unsigned long addr;
int i;
-
- syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
- GFP_KERNEL);
- if (!syscalls_metadata) {
- WARN_ON(1);
- return;
+ void *ret;
+
+ if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
+ syscalls_metadata = kcalloc(NR_syscalls,
+ sizeof(*syscalls_metadata),
+ GFP_KERNEL);
+ if (!syscalls_metadata) {
+ WARN_ON(1);
+ return;
+ }
}
for (i = 0; i < NR_syscalls; i++) {
@@ -551,7 +560,16 @@ void __init init_ftrace_syscalls(void)
continue;
meta->syscall_nr = i;
- syscalls_metadata[i] = meta;
+
+ if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
+ syscalls_metadata[i] = meta;
+ } else {
+ ret = xa_store(&syscalls_metadata_sparse, i, meta,
+ GFP_KERNEL);
+ WARN(xa_is_err(ret),
+ "Syscall memory allocation failed\n");
+ }
+
}
}
OpenPOWER on IntegriCloud