From d5a3b1f691865be576c2bffa708549b8cdccda19 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 17 Feb 2016 19:58:58 -0800
Subject: bpf: introduce BPF_MAP_TYPE_STACK_TRACE

add new map type to store stack traces and corresponding helper
bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id
@ctx: struct pt_regs*
@map: pointer to stack_trace map
@flags: bits 0-7 - numer of stack frames to skip
        bit 8 - collect user stack instead of kernel
        bit 9 - compare stacks by hash only
        bit 10 - if two different stacks hash into the same stackid
                 discard old
        other bits - reserved
Return: >= 0 stackid on success or negative error

stackid is a 32-bit integer handle that can be further combined with
other data (including other stackid) and used as a key into maps.

Userspace will access stackmap using standard lookup/delete syscall commands to
retrieve full stack trace for given stackid.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/trace/bpf_trace.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 326a75e884db..4b8caa392b86 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -299,6 +299,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_perf_event_output:
 		return &bpf_perf_event_output_proto;
+	case BPF_FUNC_get_stackid:
+		return &bpf_get_stackid_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From b121d1e74d1f24654bdc3165d3db1ca149501356 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Mon, 7 Mar 2016 21:57:13 -0800
Subject: bpf: prevent kprobe+bpf deadlocks

if kprobe is placed within update or delete hash map helpers
that hold bucket spin lock and triggered bpf program is trying to
grab the spinlock for the same bucket on the same cpu, it will
deadlock.
Fix it by extending existing recursion prevention mechanism.

Note, map_lookup and other tracing helpers don't have this problem,
since they don't hold any locks and don't modify global data.
bpf_trace_printk has its own recursive check and ok as well.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  3 +++
 kernel/bpf/syscall.c     | 13 +++++++++++++
 kernel/trace/bpf_trace.c |  2 --
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'kernel/trace')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 51e498e5470e..4b070827200d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -10,6 +10,7 @@
 #include <uapi/linux/bpf.h>
 #include <linux/workqueue.h>
 #include <linux/file.h>
+#include <linux/percpu.h>
 
 struct bpf_map;
 
@@ -163,6 +164,8 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
 #ifdef CONFIG_BPF_SYSCALL
+DECLARE_PER_CPU(int, bpf_prog_active);
+
 void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c95a753c2007..dc99f6a000f5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,6 +18,8 @@
 #include <linux/filter.h>
 #include <linux/version.h>
 
+DEFINE_PER_CPU(int, bpf_prog_active);
+
 int sysctl_unprivileged_bpf_disabled __read_mostly;
 
 static LIST_HEAD(bpf_map_types);
@@ -347,6 +349,11 @@ static int map_update_elem(union bpf_attr *attr)
 	if (copy_from_user(value, uvalue, value_size) != 0)
 		goto free_value;
 
+	/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+	 * inside bpf map update or delete otherwise deadlocks are possible
+	 */
+	preempt_disable();
+	__this_cpu_inc(bpf_prog_active);
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
 		err = bpf_percpu_hash_update(map, key, value, attr->flags);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
@@ -356,6 +363,8 @@ static int map_update_elem(union bpf_attr *attr)
 		err = map->ops->map_update_elem(map, key, value, attr->flags);
 		rcu_read_unlock();
 	}
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
 
 free_value:
 	kfree(value);
@@ -394,9 +403,13 @@ static int map_delete_elem(union bpf_attr *attr)
 	if (copy_from_user(key, ukey, map->key_size) != 0)
 		goto free_key;
 
+	preempt_disable();
+	__this_cpu_inc(bpf_prog_active);
 	rcu_read_lock();
 	err = map->ops->map_delete_elem(map, key);
 	rcu_read_unlock();
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
 
 free_key:
 	kfree(key);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4b8caa392b86..3e4ffb3ace5f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -13,8 +13,6 @@
 #include <linux/ctype.h>
 #include "trace.h"
 
-static DEFINE_PER_CPU(int, bpf_prog_active);
-
 /**
  * trace_call_bpf - invoke BPF program
  * @prog: BPF program
-- 
cgit v1.2.3