From 250bbd12c2fe1221ec96d8087d63e982d4f2180a Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Thu, 24 Apr 2014 19:08:24 +0200
Subject: uprobes/x86: Refuse to attach uprobe to "word-sized" branch insns

All branch insns on x86 can be prefixed with the operand-size
override prefix, 0x66. It was only ever useful for performing
jumps to 32-bit offsets in 16-bit code segments.

In 32-bit code, such instructions are useless since
they cause IP truncation to 16 bits, and in case of call insns,
they save only 16 bits of return address and misalign
the stack pointer as a "bonus".

In 64-bit code, such instructions are treated differently by Intel
and AMD CPUs: Intel ignores the prefix altogether,
AMD treats them the same as in 32-bit mode.

Before this patch, the emulation code would execute
the instructions as if they have no 0x66 prefix.

With this patch, we refuse to attach uprobes to such insns.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/uprobes.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ace22916ade3..3cf24a218196 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -583,6 +583,7 @@ static struct uprobe_xol_ops branch_xol_ops = {
 static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 opc1 = OPCODE1(insn);
+	int i;
 
 	/* has the side-effect of processing the entire instruction */
 	insn_get_length(insn);
@@ -612,6 +613,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 			return -ENOSYS;
 	}
 
+	/*
+	 * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported.
+	 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
+	 * No one uses these insns, reject any branch insns with such prefix.
+	 */
+	for (i = 0; i < insn->prefixes.nbytes; i++) {
+		if (insn->prefixes.bytes[i] == 0x66)
+			return -ENOTSUPP;
+	}
+
 	auprobe->branch.opc1 = opc1;
 	auprobe->branch.ilen = insn->length;
 	auprobe->branch.offs = insn->immediate.value;
-- 
cgit v1.2.1


From 73175d0d19657ec132cc24e8cf0e341e73c54868 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 19 Apr 2014 12:34:02 +0200
Subject: uprobes/x86: Add uprobe_init_insn(), kill validate_insn_{32,64}bits()

validate_insn_32bits() and validate_insn_64bits() are very similar,
turn them into the single uprobe_init_insn() which has the additional
"bool x86_64" argument which can be passed to insn_init() and used to
choose between good_insns_64/good_insns_32.

Also kill UPROBE_FIX_NONE, it has no users.

Note: the current code doesn't use ifdef's consistently, good_insns_64
depends on CONFIG_X86_64 but good_insns_32 is unconditional. This patch
removes ifdef around good_insns_64, we will add it back later along with
the similar one for good_insns_32.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/uprobes.c | 45 +++++++++++++--------------------------------
 1 file changed, 13 insertions(+), 32 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 3cf24a218196..b4aff6a70f4d 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -32,9 +32,6 @@
 
 /* Post-execution fixups. */
 
-/* No fixup needed */
-#define UPROBE_FIX_NONE		0x0
-
 /* Adjust IP back to vicinity of actual insn */
 #define UPROBE_FIX_IP		0x1
 
@@ -114,7 +111,6 @@ static volatile u32 good_2byte_insns[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
 
-#ifdef CONFIG_X86_64
 /* Good-instruction tables for 64-bit apps */
 static volatile u32 good_insns_64[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
@@ -138,7 +134,6 @@ static volatile u32 good_insns_64[256 / 32] = {
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
-#endif
 #undef W
 
 /*
@@ -209,16 +204,22 @@ static bool is_prefix_bad(struct insn *insn)
 	return false;
 }
 
-static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
+static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
 {
-	insn_init(insn, auprobe->insn, false);
+	u32 volatile *good_insns;
+
+	insn_init(insn, auprobe->insn, x86_64);
 
-	/* Skip good instruction prefixes; reject "bad" ones. */
 	insn_get_opcode(insn);
 	if (is_prefix_bad(insn))
 		return -ENOTSUPP;
 
-	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
+	if (x86_64)
+		good_insns = good_insns_64;
+	else
+		good_insns = good_insns_32;
+
+	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns))
 		return 0;
 
 	if (insn->opcode.nbytes == 2) {
@@ -355,30 +356,10 @@ handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *
 	}
 }
 
-static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
-{
-	insn_init(insn, auprobe->insn, true);
-
-	/* Skip good instruction prefixes; reject "bad" ones. */
-	insn_get_opcode(insn);
-	if (is_prefix_bad(insn))
-		return -ENOTSUPP;
-
-	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
-		return 0;
-
-	if (insn->opcode.nbytes == 2) {
-		if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
-			return 0;
-	}
-	return -ENOTSUPP;
-}
-
 static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
 {
-	if (mm->context.ia32_compat)
-		return validate_insn_32bits(auprobe, insn);
-	return validate_insn_64bits(auprobe, insn);
+	bool x86_64 = !mm->context.ia32_compat;
+	return uprobe_init_insn(auprobe, insn, x86_64);
 }
 #else /* 32-bit: */
 /*
@@ -398,7 +379,7 @@ static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *
 
 static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  struct insn *insn)
 {
-	return validate_insn_32bits(auprobe, insn);
+	return uprobe_init_insn(auprobe, insn, false);
 }
 #endif /* CONFIG_X86_64 */
 
-- 
cgit v1.2.1


From 2ae1f49ae1978fedb6ad607e1f8b084aa9752f95 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 19 Apr 2014 14:03:05 +0200
Subject: uprobes/x86: Add is_64bit_mm(), kill validate_insn_bits()

1. Extract the ->ia32_compat check from 64bit validate_insn_bits()
   into the new helper, is_64bit_mm(), it will have more users.

   TODO: this checks is actually wrong if mm owner is X32 task,
   we need another fix which changes set_personality_ia32().

   TODO: even worse, the whole 64-or-32-bit logic is very broken
   and the fix is not simple, we need the nontrivial changes in
   the core uprobes code.

2. Kill validate_insn_bits() and change its single caller to use
   uprobe_init_insn(is_64bit_mm(mm).

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/uprobes.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index b4aff6a70f4d..b3b25ddc04fb 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -231,6 +231,11 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
 }
 
 #ifdef CONFIG_X86_64
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+	return	!config_enabled(CONFIG_IA32_EMULATION) ||
+		!mm->context.ia32_compat;
+}
 /*
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
@@ -355,13 +360,11 @@ handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *
 			*correction += 4;
 	}
 }
-
-static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
+#else /* 32-bit: */
+static inline bool is_64bit_mm(struct mm_struct *mm)
 {
-	bool x86_64 = !mm->context.ia32_compat;
-	return uprobe_init_insn(auprobe, insn, x86_64);
+	return false;
 }
-#else /* 32-bit: */
 /*
  * No RIP-relative addressing on 32-bit
  */
@@ -376,11 +379,6 @@ static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *
 					long *correction)
 {
 }
-
-static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  struct insn *insn)
-{
-	return uprobe_init_insn(auprobe, insn, false);
-}
 #endif /* CONFIG_X86_64 */
 
 struct uprobe_xol_ops {
@@ -625,7 +623,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	bool fix_ip = true, fix_call = false;
 	int ret;
 
-	ret = validate_insn_bits(auprobe, mm, &insn);
+	ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm));
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.1


From ff261964cfcfe49d73690ca29b0ba2853d9497e3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 19 Apr 2014 14:15:27 +0200
Subject: uprobes/x86: Shift "insn_complete" from branch_setup_xol_ops() to
 uprobe_init_insn()

Change uprobe_init_insn() to make insn_complete() == T, this makes
other insn_get_*() calls unnecessary.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/uprobes.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index b3b25ddc04fb..98d7db50f425 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -209,8 +209,11 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
 	u32 volatile *good_insns;
 
 	insn_init(insn, auprobe->insn, x86_64);
+	/* has the side-effect of processing the entire instruction */
+	insn_get_length(insn);
+	if (WARN_ON_ONCE(!insn_complete(insn)))
+		return -ENOEXEC;
 
-	insn_get_opcode(insn);
 	if (is_prefix_bad(insn))
 		return -ENOTSUPP;
 
@@ -283,8 +286,6 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
 	 * is the immediate operand.
 	 */
 	cursor = auprobe->insn + insn_offset_modrm(insn);
-	insn_get_length(insn);
-
 	/*
 	 * Convert from rip-relative addressing to indirect addressing
 	 * via a scratch register.  Change the r/m field from 0x5 (%rip)
@@ -564,11 +565,6 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 	u8 opc1 = OPCODE1(insn);
 	int i;
 
-	/* has the side-effect of processing the entire instruction */
-	insn_get_length(insn);
-	if (WARN_ON_ONCE(!insn_complete(insn)))
-		return -ENOEXEC;
-
 	switch (opc1) {
 	case 0xeb:	/* jmp 8 */
 	case 0xe9:	/* jmp 32 */
@@ -654,7 +650,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 		fix_ip = false;
 		break;
 	case 0xff:
-		insn_get_modrm(&insn);
 		switch (MODRM_REG(&insn)) {
 		case 2: case 3:			/* call or lcall, indirect */
 			fix_call = true;
-- 
cgit v1.2.1


From 8dbacad93a2a12adebcc717e6055b1bcc1739ab8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 19 Apr 2014 16:07:15 +0200
Subject: uprobes/x86: Make good_insns_* depend on CONFIG_X86_*

Add the suitable ifdef's around good_insns_* arrays. We do not want
to add the ugly ifdef's into their only user, uprobe_init_insn(), so
the "#else" branch simply defines them as NULL. This doesn't generate
the extra code, gcc is smart enough, although the code is fine even if
it could not detect that (without CONFIG_IA32_EMULATION) is_64bit_mm()
is __builtin_constant_p().

The patch looks more complicated because it also moves good_insns_64
up close to good_insns_32.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/uprobes.c | 56 +++++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 24 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 98d7db50f425..892975b3c99c 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -64,6 +64,7 @@
  * to keep gcc from statically optimizing it out, as variable_test_bit makes
  * some versions of gcc to think only *(unsigned long*) is used.
  */
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static volatile u32 good_insns_32[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 	/*      ----------------------------------------------         */
@@ -86,32 +87,12 @@ static volatile u32 good_insns_32[256 / 32] = {
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
-
-/* Using this for both 64-bit and 32-bit apps */
-static volatile u32 good_2byte_insns[256 / 32] = {
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
-	/*      ----------------------------------------------         */
-	W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
-	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
-	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
-	W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
-	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
-	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
-	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
-	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
-	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
-	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
-	W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
-	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
-	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
-	W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
-	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
-	W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
-	/*      ----------------------------------------------         */
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
-};
+#else
+#define good_insns_32	NULL
+#endif
 
 /* Good-instruction tables for 64-bit apps */
+#if defined(CONFIG_X86_64)
 static volatile u32 good_insns_64[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 	/*      ----------------------------------------------         */
@@ -134,6 +115,33 @@ static volatile u32 good_insns_64[256 / 32] = {
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
+#else
+#define good_insns_64	NULL
+#endif
+
+/* Using this for both 64-bit and 32-bit apps */
+static volatile u32 good_2byte_insns[256 / 32] = {
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
+	/*      ----------------------------------------------         */
+	W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
+	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
+	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+	W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
+	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
+	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
+	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+	W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
+	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
+	W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
+	W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
+	/*      ----------------------------------------------         */
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
+};
 #undef W
 
 /*
-- 
cgit v1.2.1


From b24dc8dace74708fd849312722090169c5da97d3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 19 Apr 2014 18:10:09 +0200
Subject: uprobes/x86: Fix is_64bit_mm() with CONFIG_X86_X32

is_64bit_mm() assumes that mm->context.ia32_compat means the 32-bit
instruction set, this is not true if the task is TIF_X32.

Change set_personality_ia32() to initialize mm->context.ia32_compat
by TIF_X32 or TIF_IA32 instead of 1. This allows to fix is_64bit_mm()
without affecting other users, they all treat ia32_compat as "bool".

TIF_ in ->ia32_compat looks a bit strange, but this is grep-friendly
and avoids the new define's.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/process_64.c | 7 ++++---
 arch/x86/kernel/uprobes.c    | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9c0280f93d05..9b53940981b7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -413,12 +413,11 @@ void set_personality_ia32(bool x32)
 	set_thread_flag(TIF_ADDR32);
 
 	/* Mark the associated mm as containing 32-bit tasks. */
-	if (current->mm)
-		current->mm->context.ia32_compat = 1;
-
 	if (x32) {
 		clear_thread_flag(TIF_IA32);
 		set_thread_flag(TIF_X32);
+		if (current->mm)
+			current->mm->context.ia32_compat = TIF_X32;
 		current->personality &= ~READ_IMPLIES_EXEC;
 		/* is_compat_task() uses the presence of the x32
 		   syscall bit flag to determine compat status */
@@ -426,6 +425,8 @@ void set_personality_ia32(bool x32)
 	} else {
 		set_thread_flag(TIF_IA32);
 		clear_thread_flag(TIF_X32);
+		if (current->mm)
+			current->mm->context.ia32_compat = TIF_IA32;
 		current->personality |= force_personality32;
 		/* Prepare the first "return" to user space */
 		current_thread_info()->status |= TS_COMPAT;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 892975b3c99c..ecbffd16d090 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -245,7 +245,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
 static inline bool is_64bit_mm(struct mm_struct *mm)
 {
 	return	!config_enabled(CONFIG_IA32_EMULATION) ||
-		!mm->context.ia32_compat;
+		!(mm->context.ia32_compat == TIF_IA32);
 }
 /*
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
-- 
cgit v1.2.1


From dd91016dfc9ba9236cb0149984da3f0434278b49 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 22 Apr 2014 15:20:07 +0200
Subject: uprobes/x86: Don't change the task's state if ->pre_xol() fails

Currently this doesn't matter, the only ->pre_xol() hook can't fail,
but we need to fix arch_uprobe_pre_xol() anyway. If ->pre_xol() fails
we should not change regs->ip/flags, we should just return the error
to make restart actually possible.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
 arch/x86/kernel/uprobes.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ecbffd16d090..f4464b1b9435 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -687,6 +687,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
 
+	if (auprobe->ops->pre_xol) {
+		int err = auprobe->ops->pre_xol(auprobe, regs);
+		if (err)
+			return err;
+	}
+
 	regs->ip = utask->xol_vaddr;
 	utask->autask.saved_trap_nr = current->thread.trap_nr;
 	current->thread.trap_nr = UPROBE_TRAP_NR;
@@ -696,8 +702,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
 		set_task_blockstep(current, false);
 
-	if (auprobe->ops->pre_xol)
-		return auprobe->ops->pre_xol(auprobe, regs);
 	return 0;
 }
 
-- 
cgit v1.2.1


From 588fbd613c3d8fa73e96720761d49f1d40d34d4c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 21 Apr 2014 16:58:17 +0200
Subject: uprobes/x86: Introduce uprobe_xol_ops->abort() and default_abort_op()

arch_uprobe_abort_xol() calls handle_riprel_post_xol() even if
auprobe->ops != default_xol_ops. This is fine correctness wise, only
default_pre_xol_op() can set UPROBE_FIX_RIP_AX|UPROBE_FIX_RIP_CX and
otherwise handle_riprel_post_xol() is nop.

But this doesn't look clean and this doesn't allow us to move ->fixups
into the union in arch_uprobe. Move this handle_riprel_post_xol() call
into the new default_abort_op() hook and change arch_uprobe_abort_xol()
accordingly.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
 arch/x86/kernel/uprobes.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index f4464b1b9435..b3c2a92cce6c 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -394,6 +394,7 @@ struct uprobe_xol_ops {
 	bool	(*emulate)(struct arch_uprobe *, struct pt_regs *);
 	int	(*pre_xol)(struct arch_uprobe *, struct pt_regs *);
 	int	(*post_xol)(struct arch_uprobe *, struct pt_regs *);
+	void	(*abort)(struct arch_uprobe *, struct pt_regs *);
 };
 
 static inline int sizeof_long(void)
@@ -444,9 +445,15 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
 	return 0;
 }
 
+static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	handle_riprel_post_xol(auprobe, regs, NULL);
+}
+
 static struct uprobe_xol_ops default_xol_ops = {
 	.pre_xol  = default_pre_xol_op,
 	.post_xol = default_post_xol_op,
+	.abort	  = default_abort_op,
 };
 
 static bool branch_is_call(struct arch_uprobe *auprobe)
@@ -820,10 +827,11 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
 
-	current->thread.trap_nr = utask->autask.saved_trap_nr;
-	handle_riprel_post_xol(auprobe, regs, NULL);
-	instruction_pointer_set(regs, utask->vaddr);
+	if (auprobe->ops->abort)
+		auprobe->ops->abort(auprobe, regs);
 
+	current->thread.trap_nr = utask->autask.saved_trap_nr;
+	regs->ip = utask->vaddr;
 	/* clear TF if it was set by us in arch_uprobe_pre_xol() */
 	if (!utask->autask.saved_tf)
 		regs->flags &= ~X86_EFLAGS_TF;
-- 
cgit v1.2.1


From 6ded5f3848bfd3227ee208aa38f8bf8d7209d4e3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 21 Apr 2014 18:28:02 +0200
Subject: uprobes/x86: Don't use arch_uprobe_abort_xol() in
 arch_uprobe_post_xol()

014940bad8e4 "uprobes/x86: Send SIGILL if arch_uprobe_post_xol() fails"
changed arch_uprobe_post_xol() to use arch_uprobe_abort_xol() if ->post_xol
fails. This was correct and helped to avoid the additional complications,
we need to clear X86_EFLAGS_TF in this case.

However, now that we have uprobe_xol_ops->abort() hook it would be better
to avoid arch_uprobe_abort_xol() here. ->post_xol() should likely do what
->abort() does anyway, we should not do the same work twice. Currently only
handle_riprel_post_xol() can be called twice, this is unnecessary but safe.
Still this is not clean and can lead to the problems in future.

Change arch_uprobe_post_xol() to clear X86_EFLAGS_TF and restore ->ip by
hand and avoid arch_uprobe_abort_xol(). This temporary uglifies the usage
of autask.saved_tf, we will cleanup this later.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
 arch/x86/kernel/uprobes.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index b3c2a92cce6c..2efb93f96030 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -759,22 +759,24 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	struct uprobe_task *utask = current->utask;
 
 	WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
+	current->thread.trap_nr = utask->autask.saved_trap_nr;
 
 	if (auprobe->ops->post_xol) {
 		int err = auprobe->ops->post_xol(auprobe, regs);
 		if (err) {
-			arch_uprobe_abort_xol(auprobe, regs);
+			if (!utask->autask.saved_tf)
+				regs->flags &= ~X86_EFLAGS_TF;
 			/*
-			 * Restart the probed insn. ->post_xol() must ensure
-			 * this is really possible if it returns -ERESTART.
+			 * Restore ->ip for restart or post mortem analysis.
+			 * ->post_xol() must not return -ERESTART unless this
+			 * is really possible.
 			 */
+			regs->ip = utask->vaddr;
 			if (err == -ERESTART)
 				return 0;
 			return err;
 		}
 	}
-
-	current->thread.trap_nr = utask->autask.saved_trap_nr;
 	/*
 	 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
 	 * so we can get an extra SIGTRAP if we do not clear TF. We need
@@ -819,9 +821,8 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
 
 /*
  * This function gets called when XOL instruction either gets trapped or
- * the thread has a fatal signal, or if arch_uprobe_post_xol() failed.
- * Reset the instruction pointer to its probed address for the potential
- * restart or for post mortem analysis.
+ * the thread has a fatal signal. Reset the instruction pointer to its
+ * probed address for the potential restart or for post mortem analysis.
  */
 void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-- 
cgit v1.2.1


From 220ef8dc9a7a63fe202aacd3fc61e5104f6dd98c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 21 Apr 2014 20:39:56 +0200
Subject: uprobes/x86: Move UPROBE_FIX_SETF logic from arch_uprobe_post_xol()
 to default_post_xol_op()

UPROBE_FIX_SETF is only needed to handle "popf" correctly but it is
processed by the generic arch_uprobe_post_xol() code. This doesn't
allows us to make ->fixups private for default_xol_ops.

1 Change default_post_xol_op(UPROBE_FIX_SETF) to set ->saved_tf = T.

   "popf" always reads the flags from stack, it doesn't matter if TF
   was set or not before single-step. Ignoring the naming, this is
   even more logical, "saved_tf" means "owned by application" and we
   do not own this flag after "popf".

2. Change arch_uprobe_post_xol() to save ->saved_tf into the local
   "bool send_sigtrap" before ->post_xol().

3. Change arch_uprobe_post_xol() to ignore UPROBE_FIX_SETF and just
   check ->saved_tf after ->post_xol().

With this patch ->fixups and ->rip_rela_target_address are only used
by default_xol_ops hooks, we are ready to remove them from the common
part of arch_uprobe.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
 arch/x86/kernel/uprobes.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 2efb93f96030..b2bca293fc57 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -441,6 +441,9 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
 			return -ERESTART;
 		}
 	}
+	/* popf; tell the caller to not touch TF */
+	if (auprobe->fixups & UPROBE_FIX_SETF)
+		utask->autask.saved_tf = true;
 
 	return 0;
 }
@@ -757,15 +760,15 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
 int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
+	bool send_sigtrap = utask->autask.saved_tf;
+	int err = 0;
 
 	WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
 	current->thread.trap_nr = utask->autask.saved_trap_nr;
 
 	if (auprobe->ops->post_xol) {
-		int err = auprobe->ops->post_xol(auprobe, regs);
+		err = auprobe->ops->post_xol(auprobe, regs);
 		if (err) {
-			if (!utask->autask.saved_tf)
-				regs->flags &= ~X86_EFLAGS_TF;
 			/*
 			 * Restore ->ip for restart or post mortem analysis.
 			 * ->post_xol() must not return -ERESTART unless this
@@ -773,8 +776,8 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 			 */
 			regs->ip = utask->vaddr;
 			if (err == -ERESTART)
-				return 0;
-			return err;
+				err = 0;
+			send_sigtrap = false;
 		}
 	}
 	/*
@@ -782,12 +785,13 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	 * so we can get an extra SIGTRAP if we do not clear TF. We need
 	 * to examine the opcode to make it right.
 	 */
-	if (utask->autask.saved_tf)
+	if (send_sigtrap)
 		send_sig(SIGTRAP, current, 0);
-	else if (!(auprobe->fixups & UPROBE_FIX_SETF))
+
+	if (!utask->autask.saved_tf)
 		regs->flags &= ~X86_EFLAGS_TF;
 
-	return 0;
+	return err;
 }
 
 /* callback routine for handling exceptions. */
-- 
cgit v1.2.1


From 97aa5cddbe9e01521137f337624469374e3cbde5 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 22 Apr 2014 16:20:55 +0200
Subject: uprobes/x86: Move default_xol_ops's data into arch_uprobe->def

Finally we can move arch_uprobe->fixups/rip_rela_target_address
into the new "def" struct and place this struct in the union, they
are only used by default_xol_ops paths.

The patch also renames rip_rela_target_address to riprel_target just
to make this name shorter.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
 arch/x86/kernel/uprobes.c | 43 +++++++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 22 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index b2bca293fc57..7824ce248f8f 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -251,10 +251,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
  * its memory operand indirectly through a scratch register.  Set
- * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
- * accordingly.  (The contents of the scratch register will be saved
- * before we single-step the modified instruction, and restored
- * afterward.)
+ * def->fixups and def->riprel_target accordingly. (The contents of the
+ * scratch register will be saved before we single-step the modified
+ * instruction, and restored afterward).
  *
  * We do this because a rip-relative instruction can access only a
  * relatively small area (+/- 2 GB from the instruction), and the XOL
@@ -308,18 +307,18 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
 		 * is NOT the register operand, so we use %rcx (register
 		 * #1) for the scratch register.
 		 */
-		auprobe->fixups = UPROBE_FIX_RIP_CX;
+		auprobe->def.fixups = UPROBE_FIX_RIP_CX;
 		/* Change modrm from 00 000 101 to 00 000 001. */
 		*cursor = 0x1;
 	} else {
 		/* Use %rax (register #0) for the scratch register. */
-		auprobe->fixups = UPROBE_FIX_RIP_AX;
+		auprobe->def.fixups = UPROBE_FIX_RIP_AX;
 		/* Change modrm from 00 xxx 101 to 00 xxx 000 */
 		*cursor = (reg << 3);
 	}
 
 	/* Target address = address of next instruction + (signed) offset */
-	auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
+	auprobe->def.riprel_target = (long)insn->length + insn->displacement.value;
 
 	/* Displacement field is gone; slide immediate field (if any) over. */
 	if (insn->immediate.nbytes) {
@@ -336,25 +335,25 @@ static void
 pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
 				struct arch_uprobe_task *autask)
 {
-	if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
+	if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) {
 		autask->saved_scratch_register = regs->ax;
 		regs->ax = current->utask->vaddr;
-		regs->ax += auprobe->rip_rela_target_address;
-	} else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
+		regs->ax += auprobe->def.riprel_target;
+	} else if (auprobe->def.fixups & UPROBE_FIX_RIP_CX) {
 		autask->saved_scratch_register = regs->cx;
 		regs->cx = current->utask->vaddr;
-		regs->cx += auprobe->rip_rela_target_address;
+		regs->cx += auprobe->def.riprel_target;
 	}
 }
 
 static void
 handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
 {
-	if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
+	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
 		struct arch_uprobe_task *autask;
 
 		autask = &current->utask->autask;
-		if (auprobe->fixups & UPROBE_FIX_RIP_AX)
+		if (auprobe->def.fixups & UPROBE_FIX_RIP_AX)
 			regs->ax = autask->saved_scratch_register;
 		else
 			regs->cx = autask->saved_scratch_register;
@@ -432,17 +431,17 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
 	long correction = (long)(utask->vaddr - utask->xol_vaddr);
 
 	handle_riprel_post_xol(auprobe, regs, &correction);
-	if (auprobe->fixups & UPROBE_FIX_IP)
+	if (auprobe->def.fixups & UPROBE_FIX_IP)
 		regs->ip += correction;
 
-	if (auprobe->fixups & UPROBE_FIX_CALL) {
+	if (auprobe->def.fixups & UPROBE_FIX_CALL) {
 		if (adjust_ret_addr(regs->sp, correction)) {
 			regs->sp += sizeof_long();
 			return -ERESTART;
 		}
 	}
 	/* popf; tell the caller to not touch TF */
-	if (auprobe->fixups & UPROBE_FIX_SETF)
+	if (auprobe->def.fixups & UPROBE_FIX_SETF)
 		utask->autask.saved_tf = true;
 
 	return 0;
@@ -646,13 +645,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 		return ret;
 
 	/*
-	 * Figure out which fixups arch_uprobe_post_xol() will need to perform,
-	 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups
-	 * is either zero or it reflects rip-related fixups.
+	 * Figure out which fixups default_post_xol_op() will need to perform,
+	 * and annotate def->fixups accordingly. To start with, ->fixups is
+	 * either zero or it reflects rip-related fixups.
 	 */
 	switch (OPCODE1(&insn)) {
 	case 0x9d:		/* popf */
-		auprobe->fixups |= UPROBE_FIX_SETF;
+		auprobe->def.fixups |= UPROBE_FIX_SETF;
 		break;
 	case 0xc3:		/* ret or lret -- ip is correct */
 	case 0xcb:
@@ -680,9 +679,9 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	}
 
 	if (fix_ip)
-		auprobe->fixups |= UPROBE_FIX_IP;
+		auprobe->def.fixups |= UPROBE_FIX_IP;
 	if (fix_call)
-		auprobe->fixups |= UPROBE_FIX_CALL;
+		auprobe->def.fixups |= UPROBE_FIX_CALL;
 
 	auprobe->ops = &default_xol_ops;
 	return 0;
-- 
cgit v1.2.1


From 78d9af4cd375880a574327210eb9dab572618364 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 24 Apr 2014 18:52:37 +0200
Subject: uprobes/x86: Cleanup the usage of arch_uprobe->def.fixups, make it u8

handle_riprel_insn() assumes that nobody else could modify ->fixups
before. This is correct but fragile, change it to use "|=".

Also make ->fixups u8, we are going to add the new members into the
union. It is not clear why UPROBE_FIX_RIP_.X lived in the upper byte,
redefine them so that they can fit into u8.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/uprobes.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 7824ce248f8f..a8e1d7e47001 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -33,16 +33,16 @@
 /* Post-execution fixups. */
 
 /* Adjust IP back to vicinity of actual insn */
-#define UPROBE_FIX_IP		0x1
+#define UPROBE_FIX_IP		0x01
 
 /* Adjust the return address of a call insn */
-#define UPROBE_FIX_CALL	0x2
+#define UPROBE_FIX_CALL		0x02
 
 /* Instruction will modify TF, don't change it */
-#define UPROBE_FIX_SETF	0x4
+#define UPROBE_FIX_SETF		0x04
 
-#define UPROBE_FIX_RIP_AX	0x8000
-#define UPROBE_FIX_RIP_CX	0x4000
+#define UPROBE_FIX_RIP_AX	0x08
+#define UPROBE_FIX_RIP_CX	0x10
 
 #define	UPROBE_TRAP_NR		UINT_MAX
 
@@ -307,12 +307,12 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
 		 * is NOT the register operand, so we use %rcx (register
 		 * #1) for the scratch register.
 		 */
-		auprobe->def.fixups = UPROBE_FIX_RIP_CX;
+		auprobe->def.fixups |= UPROBE_FIX_RIP_CX;
 		/* Change modrm from 00 000 101 to 00 000 001. */
 		*cursor = 0x1;
 	} else {
 		/* Use %rax (register #0) for the scratch register. */
-		auprobe->def.fixups = UPROBE_FIX_RIP_AX;
+		auprobe->def.fixups |= UPROBE_FIX_RIP_AX;
 		/* Change modrm from 00 xxx 101 to 00 xxx 000 */
 		*cursor = (reg << 3);
 	}
-- 
cgit v1.2.1


From 2b82cadffc4154a25c25d88a63c7fb3397cda9d6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 24 Apr 2014 19:21:38 +0200
Subject: uprobes/x86: Introduce push_ret_address()

Extract the "push return address" code from branch_emulate_op() into
the new simple helper, push_ret_address(). It will have more users.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/uprobes.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index a8e1d7e47001..df75913acfc0 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -407,6 +407,17 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	return 0;
 }
 
+static int push_ret_address(struct pt_regs *regs, unsigned long ip)
+{
+	unsigned long new_sp = regs->sp - sizeof_long();
+
+	if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
+		return -EFAULT;
+
+	regs->sp = new_sp;
+	return 0;
+}
+
 /*
  * Adjust the return address pushed by a call insn executed out of line.
  */
@@ -517,7 +528,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	unsigned long offs = (long)auprobe->branch.offs;
 
 	if (branch_is_call(auprobe)) {
-		unsigned long new_sp = regs->sp - sizeof_long();
 		/*
 		 * If it fails we execute this (mangled, see the comment in
 		 * branch_clear_offset) insn out-of-line. In the likely case
@@ -527,9 +537,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 		 *
 		 * But there is corner case, see the comment in ->post_xol().
 		 */
-		if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long()))
+		if (push_ret_address(regs, new_ip))
 			return false;
-		regs->sp = new_sp;
 	} else if (!check_jmp_cond(auprobe, regs)) {
 		offs = 0;
 	}
-- 
cgit v1.2.1


From 1dc76e6eacef271230d9ff6fd0f91824bda03f44 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 25 Apr 2014 18:06:19 +0200
Subject: uprobes/x86: Kill adjust_ret_addr(), simplify UPROBE_FIX_CALL logic

The only insn which could have both UPROBE_FIX_IP and UPROBE_FIX_CALL
was 0xe8 "call relative", and now it is handled by branch_xol_ops.

So we can change default_post_xol_op(UPROBE_FIX_CALL) to simply push
the address of next insn == utask->vaddr + insn.length, just we need
to record insn.length into the new auprobe->def.ilen member.

Note: if/when we teach branch_xol_ops to support jcxz/loopz we can
remove the "correction" logic, UPROBE_FIX_IP can use the same address.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/uprobes.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index df75913acfc0..5bcce852628a 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -418,24 +418,6 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip)
 	return 0;
 }
 
-/*
- * Adjust the return address pushed by a call insn executed out of line.
- */
-static int adjust_ret_addr(unsigned long sp, long correction)
-{
-	int rasize = sizeof_long();
-	long ra;
-
-	if (copy_from_user(&ra, (void __user *)sp, rasize))
-		return -EFAULT;
-
-	ra += correction;
-	if (copy_to_user((void __user *)sp, &ra, rasize))
-		return -EFAULT;
-
-	return 0;
-}
-
 static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
@@ -446,10 +428,9 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
 		regs->ip += correction;
 
 	if (auprobe->def.fixups & UPROBE_FIX_CALL) {
-		if (adjust_ret_addr(regs->sp, correction)) {
-			regs->sp += sizeof_long();
+		regs->sp += sizeof_long();
+		if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen))
 			return -ERESTART;
-		}
 	}
 	/* popf; tell the caller to not touch TF */
 	if (auprobe->def.fixups & UPROBE_FIX_SETF)
@@ -687,6 +668,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 		handle_riprel_insn(auprobe, &insn);
 	}
 
+	auprobe->def.ilen = insn.length;
 	if (fix_ip)
 		auprobe->def.fixups |= UPROBE_FIX_IP;
 	if (fix_call)
-- 
cgit v1.2.1


From 83cd591485e558ab70aed45ce7261ce3f5ee8746 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 25 Apr 2014 18:53:32 +0200
Subject: uprobes/x86: Cleanup the usage of UPROBE_FIX_IP/UPROBE_FIX_CALL

Now that UPROBE_FIX_IP/UPROBE_FIX_CALL are mutually exclusive we can
use a single "fix_ip_or_call" enum instead of 2 fix_* booleans. This
way the logic looks more understandable and clean to me.

While at it, join "case 0xea" with other "ip is correct" ret/lret cases.
Also change default_post_xol_op() to use "else if" for the same reason.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/uprobes.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 5bcce852628a..d2792e884d54 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -424,10 +424,9 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
 	long correction = (long)(utask->vaddr - utask->xol_vaddr);
 
 	handle_riprel_post_xol(auprobe, regs, &correction);
-	if (auprobe->def.fixups & UPROBE_FIX_IP)
+	if (auprobe->def.fixups & UPROBE_FIX_IP) {
 		regs->ip += correction;
-
-	if (auprobe->def.fixups & UPROBE_FIX_CALL) {
+	} else if (auprobe->def.fixups & UPROBE_FIX_CALL) {
 		regs->sp += sizeof_long();
 		if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen))
 			return -ERESTART;
@@ -623,7 +622,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
 {
 	struct insn insn;
-	bool fix_ip = true, fix_call = false;
+	u8 fix_ip_or_call = UPROBE_FIX_IP;
 	int ret;
 
 	ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm));
@@ -647,21 +646,20 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	case 0xcb:
 	case 0xc2:
 	case 0xca:
-		fix_ip = false;
+	case 0xea:		/* jmp absolute -- ip is correct */
+		fix_ip_or_call = 0;
 		break;
 	case 0x9a:		/* call absolute - Fix return addr, not ip */
-		fix_call = true;
-		fix_ip = false;
-		break;
-	case 0xea:		/* jmp absolute -- ip is correct */
-		fix_ip = false;
+		fix_ip_or_call = UPROBE_FIX_CALL;
 		break;
 	case 0xff:
 		switch (MODRM_REG(&insn)) {
 		case 2: case 3:			/* call or lcall, indirect */
-			fix_call = true;
+			fix_ip_or_call = UPROBE_FIX_CALL;
+			break;
 		case 4: case 5:			/* jmp or ljmp, indirect */
-			fix_ip = false;
+			fix_ip_or_call = 0;
+			break;
 		}
 		/* fall through */
 	default:
@@ -669,10 +667,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	}
 
 	auprobe->def.ilen = insn.length;
-	if (fix_ip)
-		auprobe->def.fixups |= UPROBE_FIX_IP;
-	if (fix_call)
-		auprobe->def.fixups |= UPROBE_FIX_CALL;
+	auprobe->def.fixups |= fix_ip_or_call;
 
 	auprobe->ops = &default_xol_ops;
 	return 0;
-- 
cgit v1.2.1


From 1475ee7fadafc6d0c194f2f4cbdae10ed04b9580 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 27 Apr 2014 16:31:59 +0200
Subject: uprobes/x86: Rename *riprel* helpers to make the naming consistent

handle_riprel_insn(), pre_xol_rip_insn() and handle_riprel_post_xol()
look confusing and inconsistent. Rename them into riprel_analyze(),
riprel_pre_xol(), and riprel_post_xol() respectively.

No changes in compiled code.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/uprobes.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index d2792e884d54..187be0e15e1d 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -268,8 +268,7 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
  *  - There's never a SIB byte.
  *  - The displacement is always 4 bytes.
  */
-static void
-handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
+static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 *cursor;
 	u8 reg;
@@ -331,8 +330,7 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
  * If we're emulating a rip-relative instruction, save the contents
  * of the scratch register and store the target address in that register.
  */
-static void
-pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
 				struct arch_uprobe_task *autask)
 {
 	if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) {
@@ -346,8 +344,8 @@ pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
 	}
 }
 
-static void
-handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
+				long *correction)
 {
 	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
 		struct arch_uprobe_task *autask;
@@ -376,14 +374,14 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
 /*
  * No RIP-relative addressing on 32-bit
  */
-static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
+static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
 }
-static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
 				struct arch_uprobe_task *autask)
 {
 }
-static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
 					long *correction)
 {
 }
@@ -403,7 +401,7 @@ static inline int sizeof_long(void)
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	pre_xol_rip_insn(auprobe, regs, &current->utask->autask);
+	riprel_pre_xol(auprobe, regs, &current->utask->autask);
 	return 0;
 }
 
@@ -423,7 +421,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
 	struct uprobe_task *utask = current->utask;
 	long correction = (long)(utask->vaddr - utask->xol_vaddr);
 
-	handle_riprel_post_xol(auprobe, regs, &correction);
+	riprel_post_xol(auprobe, regs, &correction);
 	if (auprobe->def.fixups & UPROBE_FIX_IP) {
 		regs->ip += correction;
 	} else if (auprobe->def.fixups & UPROBE_FIX_CALL) {
@@ -440,7 +438,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
 
 static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	handle_riprel_post_xol(auprobe, regs, NULL);
+	riprel_post_xol(auprobe, regs, NULL);
 }
 
 static struct uprobe_xol_ops default_xol_ops = {
@@ -663,7 +661,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 		}
 		/* fall through */
 	default:
-		handle_riprel_insn(auprobe, &insn);
+		riprel_analyze(auprobe, &insn);
 	}
 
 	auprobe->def.ilen = insn.length;
-- 
cgit v1.2.1


From 7f55e82bacaaa2c41b8e14d6bc78129b096b67b8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 27 Apr 2014 17:00:46 +0200
Subject: uprobes/x86: Kill the "autask" arg of riprel_pre_xol()

default_pre_xol_op() passes &current->utask->autask to riprel_pre_xol()
and this is just ugly because it still needs to load current->utask to
read ->vaddr.

Remove this argument, change riprel_pre_xol() to use current->utask.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/uprobes.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 187be0e15e1d..5df1bca7c2bc 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -330,16 +330,17 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
  * If we're emulating a rip-relative instruction, save the contents
  * of the scratch register and store the target address in that register.
  */
-static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
-				struct arch_uprobe_task *autask)
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
+	struct uprobe_task *utask = current->utask;
+
 	if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) {
-		autask->saved_scratch_register = regs->ax;
-		regs->ax = current->utask->vaddr;
+		utask->autask.saved_scratch_register = regs->ax;
+		regs->ax = utask->vaddr;
 		regs->ax += auprobe->def.riprel_target;
 	} else if (auprobe->def.fixups & UPROBE_FIX_RIP_CX) {
-		autask->saved_scratch_register = regs->cx;
-		regs->cx = current->utask->vaddr;
+		utask->autask.saved_scratch_register = regs->cx;
+		regs->cx = utask->vaddr;
 		regs->cx += auprobe->def.riprel_target;
 	}
 }
@@ -377,8 +378,7 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
 static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
 }
-static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
-				struct arch_uprobe_task *autask)
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 }
 static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
@@ -401,7 +401,7 @@ static inline int sizeof_long(void)
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	riprel_pre_xol(auprobe, regs, &current->utask->autask);
+	riprel_pre_xol(auprobe, regs);
 	return 0;
 }
 
-- 
cgit v1.2.1


From c90a6950120a7e45f31a22653fe6543507ae64d0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 27 Apr 2014 18:13:31 +0200
Subject: uprobes/x86: Simplify riprel_{pre,post}_xol() and make them similar

Ignoring the "correction" logic riprel_pre_xol() and riprel_post_xol()
are very similar but look quite differently.

1. Add the "UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX" check at the start
   of riprel_pre_xol(), like the same check in riprel_post_xol().

2. Add the trivial scratch_reg() helper which returns the address of
   scratch register pre_xol/post_xol need to change.

3. Change these functions to use the new helper and avoid copy-and-paste
   under if/else branches.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/uprobes.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 5df1bca7c2bc..2ebadb252093 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -326,22 +326,24 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 	}
 }
 
+static inline unsigned long *
+scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? &regs->ax : &regs->cx;
+}
+
 /*
  * If we're emulating a rip-relative instruction, save the contents
  * of the scratch register and store the target address in that register.
  */
 static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	struct uprobe_task *utask = current->utask;
+	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
+		struct uprobe_task *utask = current->utask;
+		unsigned long *sr = scratch_reg(auprobe, regs);
 
-	if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) {
-		utask->autask.saved_scratch_register = regs->ax;
-		regs->ax = utask->vaddr;
-		regs->ax += auprobe->def.riprel_target;
-	} else if (auprobe->def.fixups & UPROBE_FIX_RIP_CX) {
-		utask->autask.saved_scratch_register = regs->cx;
-		regs->cx = utask->vaddr;
-		regs->cx += auprobe->def.riprel_target;
+		utask->autask.saved_scratch_register = *sr;
+		*sr = utask->vaddr + auprobe->def.riprel_target;
 	}
 }
 
@@ -349,14 +351,10 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
 				long *correction)
 {
 	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
-		struct arch_uprobe_task *autask;
-
-		autask = &current->utask->autask;
-		if (auprobe->def.fixups & UPROBE_FIX_RIP_AX)
-			regs->ax = autask->saved_scratch_register;
-		else
-			regs->cx = autask->saved_scratch_register;
+		struct uprobe_task *utask = current->utask;
+		unsigned long *sr = scratch_reg(auprobe, regs);
 
+		*sr = utask->autask.saved_scratch_register;
 		/*
 		 * The original instruction includes a displacement, and so
 		 * is 4 bytes longer than what we've just single-stepped.
-- 
cgit v1.2.1


From 50204c6f6dd01b5bce1b53e0b003d01849455512 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Thu, 1 May 2014 16:52:46 +0200
Subject: uprobes/x86: Simplify rip-relative handling

It is possible to replace rip-relative addressing mode with addressing
mode of the same length: (reg+disp32). This eliminates the need to fix
up immediate and correct for changing instruction length.

And we can kill arch_uprobe->def.riprel_target.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/uprobes.c | 71 ++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 41 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 2ebadb252093..31dcb4d5ea46 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -251,9 +251,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
  * its memory operand indirectly through a scratch register.  Set
- * def->fixups and def->riprel_target accordingly. (The contents of the
- * scratch register will be saved before we single-step the modified
- * instruction, and restored afterward).
+ * def->fixups accordingly. (The contents of the scratch register
+ * will be saved before we single-step the modified instruction,
+ * and restored afterward).
  *
  * We do this because a rip-relative instruction can access only a
  * relatively small area (+/- 2 GB from the instruction), and the XOL
@@ -264,9 +264,12 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
  *
  * Some useful facts about rip-relative instructions:
  *
- *  - There's always a modrm byte.
+ *  - There's always a modrm byte with bit layout "00 reg 101".
  *  - There's never a SIB byte.
  *  - The displacement is always 4 bytes.
+ *  - REX.B=1 bit in REX prefix, which normally extends r/m field,
+ *    has no effect on rip-relative mode. It doesn't make modrm byte
+ *    with r/m=101 refer to register 1101 = R13.
  */
 static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
@@ -293,9 +296,8 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 	 */
 	cursor = auprobe->insn + insn_offset_modrm(insn);
 	/*
-	 * Convert from rip-relative addressing to indirect addressing
-	 * via a scratch register.  Change the r/m field from 0x5 (%rip)
-	 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
+	 * Convert from rip-relative addressing
+	 * to register-relative addressing via a scratch register.
 	 */
 	reg = MODRM_REG(insn);
 	if (reg == 0) {
@@ -307,22 +309,21 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 		 * #1) for the scratch register.
 		 */
 		auprobe->def.fixups |= UPROBE_FIX_RIP_CX;
-		/* Change modrm from 00 000 101 to 00 000 001. */
-		*cursor = 0x1;
+		/*
+		 * Change modrm from "00 000 101" to "10 000 001". Example:
+		 * 89 05 disp32  mov %eax,disp32(%rip) becomes
+		 * 89 81 disp32  mov %eax,disp32(%rcx)
+		 */
+		*cursor = 0x81;
 	} else {
 		/* Use %rax (register #0) for the scratch register. */
 		auprobe->def.fixups |= UPROBE_FIX_RIP_AX;
-		/* Change modrm from 00 xxx 101 to 00 xxx 000 */
-		*cursor = (reg << 3);
-	}
-
-	/* Target address = address of next instruction + (signed) offset */
-	auprobe->def.riprel_target = (long)insn->length + insn->displacement.value;
-
-	/* Displacement field is gone; slide immediate field (if any) over. */
-	if (insn->immediate.nbytes) {
-		cursor++;
-		memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
+		/*
+		 * Change modrm from "00 reg 101" to "10 reg 000". Example:
+		 * 89 1d disp32  mov %edx,disp32(%rip) becomes
+		 * 89 98 disp32  mov %edx,disp32(%rax)
+		 */
+		*cursor = (reg << 3) | 0x80;
 	}
 }
 
@@ -343,26 +344,17 @@ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 		unsigned long *sr = scratch_reg(auprobe, regs);
 
 		utask->autask.saved_scratch_register = *sr;
-		*sr = utask->vaddr + auprobe->def.riprel_target;
+		*sr = utask->vaddr + auprobe->def.ilen;
 	}
 }
 
-static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
-				long *correction)
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
 		struct uprobe_task *utask = current->utask;
 		unsigned long *sr = scratch_reg(auprobe, regs);
 
 		*sr = utask->autask.saved_scratch_register;
-		/*
-		 * The original instruction includes a displacement, and so
-		 * is 4 bytes longer than what we've just single-stepped.
-		 * Caller may need to apply other fixups to handle stuff
-		 * like "jmpq *...(%rip)" and "callq *...(%rip)".
-		 */
-		if (correction)
-			*correction += 4;
 	}
 }
 #else /* 32-bit: */
@@ -379,8 +371,7 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 }
-static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
-					long *correction)
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 }
 #endif /* CONFIG_X86_64 */
@@ -417,10 +408,10 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip)
 static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
-	long correction = (long)(utask->vaddr - utask->xol_vaddr);
 
-	riprel_post_xol(auprobe, regs, &correction);
+	riprel_post_xol(auprobe, regs);
 	if (auprobe->def.fixups & UPROBE_FIX_IP) {
+		long correction = utask->vaddr - utask->xol_vaddr;
 		regs->ip += correction;
 	} else if (auprobe->def.fixups & UPROBE_FIX_CALL) {
 		regs->sp += sizeof_long();
@@ -436,7 +427,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
 
 static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	riprel_post_xol(auprobe, regs, NULL);
+	riprel_post_xol(auprobe, regs);
 }
 
 static struct uprobe_xol_ops default_xol_ops = {
@@ -732,11 +723,9 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
  *
  * If the original instruction was a rip-relative instruction such as
  * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
- * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
- * We need to restore the contents of the scratch register and adjust
- * the ip, keeping in mind that the instruction we executed is 4 bytes
- * shorter than the original instruction (since we squeezed out the offset
- * field).  (FIX_RIP_AX or FIX_RIP_CX)
+ * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rax)".
+ * We need to restore the contents of the scratch register
+ * (FIX_RIP_AX or FIX_RIP_CX).
  */
 int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-- 
cgit v1.2.1


From 1ea30fb64598bd3a6ba43d874bb53c55878eaef5 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Fri, 2 May 2014 17:04:00 +0200
Subject: uprobes/x86: Fix scratch register selection for rip-relative fixups

Before this patch, instructions such as div, mul, shifts with count
in CL, cmpxchg are mishandled.

This patch adds vex prefix handling. In particular, it avoids colliding
with register operand encoded in vex.vvvv field.

Since we need to avoid two possible register operands, the selection of
scratch register needs to be from at least three registers.

After looking through a lot of CPU docs, it looks like the safest choice
is SI,DI,BX. Selecting BX needs care to not collide with implicit use of
BX by cmpxchg8b.

Test-case:

	#include <stdio.h>

	static const char *const pass[] = { "FAIL", "pass" };

	long two = 2;
	void test1(void)
	{
		long ax = 0, dx = 0;
		asm volatile("\n"
	"			xor	%%edx,%%edx\n"
	"			lea	2(%%edx),%%eax\n"
	// We divide 2 by 2. Result (in eax) should be 1:
	"	probe1:		.globl	probe1\n"
	"			divl	two(%%rip)\n"
	// If we have a bug (eax mangled on entry) the result will be 2,
	// because eax gets restored by probe machinery.
		: "=a" (ax), "=d" (dx) /*out*/
		: "0" (ax), "1" (dx) /*in*/
		: "memory" /*clobber*/
		);
		dprintf(2, "%s: %s\n", __func__,
			pass[ax == 1]
		);
	}

	long val2 = 0;
	void test2(void)
	{
		long old_val = val2;
		long ax = 0, dx = 0;
		asm volatile("\n"
	"			mov	val2,%%eax\n"     // eax := val2
	"			lea	1(%%eax),%%edx\n" // edx := eax+1
	// eax is equal to val2. cmpxchg should store edx to val2:
	"	probe2:		.globl  probe2\n"
	"			cmpxchg %%edx,val2(%%rip)\n"
	// If we have a bug (eax mangled on entry), val2 will stay unchanged
		: "=a" (ax), "=d" (dx) /*out*/
		: "0" (ax), "1" (dx) /*in*/
		: "memory" /*clobber*/
		);
		dprintf(2, "%s: %s\n", __func__,
			pass[val2 == old_val + 1]
		);
	}

	long val3[2] = {0,0};
	void test3(void)
	{
		long old_val = val3[0];
		long ax = 0, dx = 0;
		asm volatile("\n"
	"			mov	val3,%%eax\n"  // edx:eax := val3
	"			mov	val3+4,%%edx\n"
	"			mov	%%eax,%%ebx\n" // ecx:ebx := edx:eax + 1
	"			mov	%%edx,%%ecx\n"
	"			add	$1,%%ebx\n"
	"			adc	$0,%%ecx\n"
	// edx:eax is equal to val3. cmpxchg8b should store ecx:ebx to val3:
	"	probe3:		.globl  probe3\n"
	"			cmpxchg8b val3(%%rip)\n"
	// If we have a bug (edx:eax mangled on entry), val3 will stay unchanged.
	// If ecx:edx in mangled, val3 will get wrong value.
		: "=a" (ax), "=d" (dx) /*out*/
		: "0" (ax), "1" (dx) /*in*/
		: "cx", "bx", "memory" /*clobber*/
		);
		dprintf(2, "%s: %s\n", __func__,
			pass[val3[0] == old_val + 1 && val3[1] == 0]
		);
	}

	int main(int argc, char **argv)
	{
		test1();
		test2();
		test3();
		return 0;
	}

Before this change all tests fail if probe{1,2,3} are probed.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/uprobes.c | 176 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 125 insertions(+), 51 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 31dcb4d5ea46..159ca520ef5b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -41,8 +41,11 @@
 /* Instruction will modify TF, don't change it */
 #define UPROBE_FIX_SETF		0x04
 
-#define UPROBE_FIX_RIP_AX	0x08
-#define UPROBE_FIX_RIP_CX	0x10
+#define UPROBE_FIX_RIP_SI	0x08
+#define UPROBE_FIX_RIP_DI	0x10
+#define UPROBE_FIX_RIP_BX	0x20
+#define UPROBE_FIX_RIP_MASK	\
+	(UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX)
 
 #define	UPROBE_TRAP_NR		UINT_MAX
 
@@ -275,20 +278,109 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 *cursor;
 	u8 reg;
+	u8 reg2;
 
 	if (!insn_rip_relative(insn))
 		return;
 
 	/*
-	 * insn_rip_relative() would have decoded rex_prefix, modrm.
+	 * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm.
 	 * Clear REX.b bit (extension of MODRM.rm field):
-	 * we want to encode rax/rcx, not r8/r9.
+	 * we want to encode low numbered reg, not r8+.
 	 */
 	if (insn->rex_prefix.nbytes) {
 		cursor = auprobe->insn + insn_offset_rex_prefix(insn);
-		*cursor &= 0xfe;	/* Clearing REX.B bit */
+		/* REX byte has 0100wrxb layout, clearing REX.b bit */
+		*cursor &= 0xfe;
 	}
+	/*
+	 * Similar treatment for VEX3 prefix.
+	 * TODO: add XOP/EVEX treatment when insn decoder supports them
+	 */
+	if (insn->vex_prefix.nbytes == 3) {
+		/*
+		 * vex2:     c5    rvvvvLpp   (has no b bit)
+		 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
+		 * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
+		 *   (evex will need setting of both b and x since
+		 *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
+		 * Setting VEX3.b (setting because it has inverted meaning):
+		 */
+		cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
+		*cursor |= 0x20;
+	}
+
+	/*
+	 * Convert from rip-relative addressing to register-relative addressing
+	 * via a scratch register.
+	 *
+	 * This is tricky since there are insns with modrm byte
+	 * which also use registers not encoded in modrm byte:
+	 * [i]div/[i]mul: implicitly use dx:ax
+	 * shift ops: implicitly use cx
+	 * cmpxchg: implicitly uses ax
+	 * cmpxchg8/16b: implicitly uses dx:ax and bx:cx
+	 *   Encoding: 0f c7/1 modrm
+	 *   The code below thinks that reg=1 (cx), chooses si as scratch.
+	 * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m.
+	 *   First appeared in Haswell (BMI2 insn). It is vex-encoded.
+	 *   Example where none of bx,cx,dx can be used as scratch reg:
+	 *   c4 e2 63 f6 0d disp32   mulx disp32(%rip),%ebx,%ecx
+	 * [v]pcmpistri: implicitly uses cx, xmm0
+	 * [v]pcmpistrm: implicitly uses xmm0
+	 * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0
+	 * [v]pcmpestrm: implicitly uses ax, dx, xmm0
+	 *   Evil SSE4.2 string comparison ops from hell.
+	 * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination.
+	 *   Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm.
+	 *   Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi).
+	 *   AMD says it has no 3-operand form (vex.vvvv must be 1111)
+	 *   and that it can have only register operands, not mem
+	 *   (its modrm byte must have mode=11).
+	 *   If these restrictions will ever be lifted,
+	 *   we'll need code to prevent selection of di as scratch reg!
+	 *
+	 * Summary: I don't know any insns with modrm byte which
+	 * use SI register implicitly. DI register is used only
+	 * by one insn (maskmovq) and BX register is used
+	 * only by one too (cmpxchg8b).
+	 * BP is stack-segment based (may be a problem?).
+	 * AX, DX, CX are off-limits (many implicit users).
+	 * SP is unusable (it's stack pointer - think about "pop mem";
+	 * also, rsp+disp32 needs sib encoding -> insn length change).
+	 */
 
+	reg = MODRM_REG(insn);	/* Fetch modrm.reg */
+	reg2 = 0xff;		/* Fetch vex.vvvv */
+	if (insn->vex_prefix.nbytes == 2)
+		reg2 = insn->vex_prefix.bytes[1];
+	else if (insn->vex_prefix.nbytes == 3)
+		reg2 = insn->vex_prefix.bytes[2];
+	/*
+	 * TODO: add XOP, EXEV vvvv reading.
+	 *
+	 * vex.vvvv field is in bits 6-3, bits are inverted.
+	 * But in 32-bit mode, high-order bit may be ignored.
+	 * Therefore, let's consider only 3 low-order bits.
+	 */
+	reg2 = ((reg2 >> 3) & 0x7) ^ 0x7;
+	/*
+	 * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15.
+	 *
+	 * Choose scratch reg. Order is important: must not select bx
+	 * if we can use si (cmpxchg8b case!)
+	 */
+	if (reg != 6 && reg2 != 6) {
+		reg2 = 6;
+		auprobe->def.fixups |= UPROBE_FIX_RIP_SI;
+	} else if (reg != 7 && reg2 != 7) {
+		reg2 = 7;
+		auprobe->def.fixups |= UPROBE_FIX_RIP_DI;
+		/* TODO (paranoia): force maskmovq to not use di */
+	} else {
+		reg2 = 3;
+		auprobe->def.fixups |= UPROBE_FIX_RIP_BX;
+	}
 	/*
 	 * Point cursor at the modrm byte.  The next 4 bytes are the
 	 * displacement.  Beyond the displacement, for some instructions,
@@ -296,41 +388,21 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 	 */
 	cursor = auprobe->insn + insn_offset_modrm(insn);
 	/*
-	 * Convert from rip-relative addressing
-	 * to register-relative addressing via a scratch register.
+	 * Change modrm from "00 reg 101" to "10 reg reg2". Example:
+	 * 89 05 disp32  mov %eax,disp32(%rip) becomes
+	 * 89 86 disp32  mov %eax,disp32(%rsi)
 	 */
-	reg = MODRM_REG(insn);
-	if (reg == 0) {
-		/*
-		 * The register operand (if any) is either the A register
-		 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
-		 * REX prefix) %r8.  In any case, we know the C register
-		 * is NOT the register operand, so we use %rcx (register
-		 * #1) for the scratch register.
-		 */
-		auprobe->def.fixups |= UPROBE_FIX_RIP_CX;
-		/*
-		 * Change modrm from "00 000 101" to "10 000 001". Example:
-		 * 89 05 disp32  mov %eax,disp32(%rip) becomes
-		 * 89 81 disp32  mov %eax,disp32(%rcx)
-		 */
-		*cursor = 0x81;
-	} else {
-		/* Use %rax (register #0) for the scratch register. */
-		auprobe->def.fixups |= UPROBE_FIX_RIP_AX;
-		/*
-		 * Change modrm from "00 reg 101" to "10 reg 000". Example:
-		 * 89 1d disp32  mov %edx,disp32(%rip) becomes
-		 * 89 98 disp32  mov %edx,disp32(%rax)
-		 */
-		*cursor = (reg << 3) | 0x80;
-	}
+	*cursor = 0x80 | (reg << 3) | reg2;
 }
 
 static inline unsigned long *
 scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? &regs->ax : &regs->cx;
+	if (auprobe->def.fixups & UPROBE_FIX_RIP_SI)
+		return &regs->si;
+	if (auprobe->def.fixups & UPROBE_FIX_RIP_DI)
+		return &regs->di;
+	return &regs->bx;
 }
 
 /*
@@ -339,7 +411,7 @@ scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
  */
 static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
+	if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) {
 		struct uprobe_task *utask = current->utask;
 		unsigned long *sr = scratch_reg(auprobe, regs);
 
@@ -350,7 +422,7 @@ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 
 static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
+	if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) {
 		struct uprobe_task *utask = current->utask;
 		unsigned long *sr = scratch_reg(auprobe, regs);
 
@@ -405,6 +477,23 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip)
 	return 0;
 }
 
+/*
+ * We have to fix things up as follows:
+ *
+ * Typically, the new ip is relative to the copied instruction.  We need
+ * to make it relative to the original instruction (FIX_IP).  Exceptions
+ * are return instructions and absolute or indirect jump or call instructions.
+ *
+ * If the single-stepped instruction was a call, the return address that
+ * is atop the stack is the address following the copied instruction.  We
+ * need to make it the address following the original instruction (FIX_CALL).
+ *
+ * If the original instruction was a rip-relative instruction such as
+ * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
+ * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)".
+ * We need to restore the contents of the scratch register
+ * (FIX_RIP_reg).
+ */
 static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
@@ -711,21 +800,6 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
  * single-step, we single-stepped a copy of the instruction.
  *
  * This function prepares to resume execution after the single-step.
- * We have to fix things up as follows:
- *
- * Typically, the new ip is relative to the copied instruction.  We need
- * to make it relative to the original instruction (FIX_IP).  Exceptions
- * are return instructions and absolute or indirect jump or call instructions.
- *
- * If the single-stepped instruction was a call, the return address that
- * is atop the stack is the address following the copied instruction.  We
- * need to make it the address following the original instruction (FIX_CALL).
- *
- * If the original instruction was a rip-relative instruction such as
- * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
- * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rax)".
- * We need to restore the contents of the scratch register
- * (FIX_RIP_AX or FIX_RIP_CX).
  */
 int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-- 
cgit v1.2.1


From 5e1b05beeca8139204324581a5b1ffb53d057f96 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 8 May 2014 20:34:00 +0200
Subject: x86/traps: Make math_error() static

Trivial, make math_error() static.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 57409f6b8c62..8eddd628326e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -488,7 +488,7 @@ exit:
  * the correct behaviour even in the presence of the asynchronous
  * IRQ13 behaviour
  */
-void math_error(struct pt_regs *regs, int error_code, int trapnr)
+static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 {
 	struct task_struct *task = current;
 	siginfo_t info;
-- 
cgit v1.2.1


From 38cad57be9800e46c52a3612fb9d963eee4fd9c3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 7 May 2014 16:47:09 +0200
Subject: x86/traps: Use SEND_SIG_PRIV instead of force_sig()

force_sig() is just force_sig_info(SEND_SIG_PRIV). Imho it should die,
we have too many ugly "send signal" helpers.

And do_trap() looks just ugly because it uses force_sig_info() or
force_sig() depending on info != NULL.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/traps.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8eddd628326e..2cd429117a41 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -168,10 +168,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 	}
 #endif
 
-	if (info)
-		force_sig_info(signr, info, tsk);
-	else
-		force_sig(signr, tsk);
+	force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
 }
 
 #define DO_ERROR(trapnr, signr, str, name)				\
@@ -305,7 +302,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
 		pr_cont("\n");
 	}
 
-	force_sig(SIGSEGV, tsk);
+	force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 exit:
 	exception_exit(prev_state);
 }
@@ -645,7 +642,7 @@ void math_state_restore(void)
 	 */
 	if (unlikely(restore_fpu_checking(tsk))) {
 		drop_init_fpu(tsk);
-		force_sig(SIGSEGV, tsk);
+		force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 		return;
 	}
 
-- 
cgit v1.2.1


From dff0796e53c29147c9bd1f5567a261dcf0e528bc Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 7 May 2014 17:21:34 +0200
Subject: x86/traps: Introduce do_error_trap()

Move the common code from DO_ERROR() and DO_ERROR_INFO() into the new
helper, do_error_trap(). This simplifies define's and shaves 527 bytes
from traps.o.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/traps.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 2cd429117a41..ab8dad719b9e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -171,41 +171,37 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 	force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
 }
 
+static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
+			  unsigned long trapnr, int signr, siginfo_t *info)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
+			NOTIFY_STOP) {
+		conditional_sti(regs);
+		do_trap(trapnr, signr, str, regs, error_code, info);
+	}
+
+	exception_exit(prev_state);
+}
+
 #define DO_ERROR(trapnr, signr, str, name)				\
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
-	enum ctx_state prev_state;					\
-									\
-	prev_state = exception_enter();					\
-	if (notify_die(DIE_TRAP, str, regs, error_code,			\
-			trapnr, signr) == NOTIFY_STOP) {		\
-		exception_exit(prev_state);				\
-		return;							\
-	}								\
-	conditional_sti(regs);						\
-	do_trap(trapnr, signr, str, regs, error_code, NULL);		\
-	exception_exit(prev_state);					\
+	do_error_trap(regs, error_code, str, trapnr, signr, NULL);	\
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
 	siginfo_t info;							\
-	enum ctx_state prev_state;					\
 									\
 	info.si_signo = signr;						\
 	info.si_errno = 0;						\
 	info.si_code = sicode;						\
 	info.si_addr = (void __user *)siaddr;				\
-	prev_state = exception_enter();					\
-	if (notify_die(DIE_TRAP, str, regs, error_code,			\
-			trapnr, signr) == NOTIFY_STOP) {		\
-		exception_exit(prev_state);				\
-		return;							\
-	}								\
-	conditional_sti(regs);						\
-	do_trap(trapnr, signr, str, regs, error_code, &info);		\
-	exception_exit(prev_state);					\
+									\
+	do_error_trap(regs, error_code, str, trapnr, signr, &info);	\
 }
 
 DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",			divide_error,		     FPE_INTDIV, regs->ip )
-- 
cgit v1.2.1


From 958d3d729802f7d741cbe8400e69b89baae580ee Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 7 May 2014 17:59:39 +0200
Subject: x86/traps: Introduce fill_trap_info(), simplify DO_ERROR_INFO()

Extract the fill-siginfo code from DO_ERROR_INFO() into the new helper,
fill_trap_info().

It can calculate si_code and si_addr looking at trapnr, so we can remove
these arguments from DO_ERROR_INFO() and simplify the source code. The
generated code is the same, __builtin_constant_p(trapnr) == T.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/traps.c | 53 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 15 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ab8dad719b9e..1cf8a4c409b6 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -136,6 +136,33 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 	return -1;
 }
 
+static void fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
+			   siginfo_t *info)
+{
+	unsigned long siaddr;
+	int sicode;
+
+	switch (trapnr) {
+	case X86_TRAP_DE:
+		sicode = FPE_INTDIV;
+		siaddr = regs->ip;
+		break;
+	case X86_TRAP_UD:
+		sicode = ILL_ILLOPN;
+		siaddr = regs->ip;
+		break;
+	case X86_TRAP_AC:
+		sicode = BUS_ADRALN;
+		siaddr = 0;
+		break;
+	}
+
+	info->si_signo = signr;
+	info->si_errno = 0;
+	info->si_code = sicode;
+	info->si_addr = (void __user *)siaddr;
+}
+
 static void __kprobes
 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 	long error_code, siginfo_t *info)
@@ -191,30 +218,26 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 	do_error_trap(regs, error_code, str, trapnr, signr, NULL);	\
 }
 
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
+#define DO_ERROR_INFO(trapnr, signr, str, name)				\
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
 	siginfo_t info;							\
 									\
-	info.si_signo = signr;						\
-	info.si_errno = 0;						\
-	info.si_code = sicode;						\
-	info.si_addr = (void __user *)siaddr;				\
-									\
+	fill_trap_info(regs, signr, trapnr, &info);			\
 	do_error_trap(regs, error_code, str, trapnr, signr, &info);	\
 }
 
-DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",			divide_error,		     FPE_INTDIV, regs->ip )
-DO_ERROR     (X86_TRAP_OF,     SIGSEGV, "overflow",			overflow					  )
-DO_ERROR     (X86_TRAP_BR,     SIGSEGV, "bounds",			bounds						  )
-DO_ERROR_INFO(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op,		     ILL_ILLOPN, regs->ip )
-DO_ERROR     (X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",	coprocessor_segment_overrun			  )
-DO_ERROR     (X86_TRAP_TS,     SIGSEGV, "invalid TSS",			invalid_TSS					  )
-DO_ERROR     (X86_TRAP_NP,     SIGBUS,  "segment not present",		segment_not_present				  )
+DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",			divide_error)
+DO_ERROR     (X86_TRAP_OF,     SIGSEGV, "overflow",			overflow)
+DO_ERROR     (X86_TRAP_BR,     SIGSEGV, "bounds",			bounds)
+DO_ERROR_INFO(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op)
+DO_ERROR     (X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",	coprocessor_segment_overrun)
+DO_ERROR     (X86_TRAP_TS,     SIGSEGV, "invalid TSS",			invalid_TSS)
+DO_ERROR     (X86_TRAP_NP,     SIGBUS,  "segment not present",		segment_not_present)
 #ifdef CONFIG_X86_32
-DO_ERROR     (X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment					  )
+DO_ERROR     (X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment)
 #endif
-DO_ERROR_INFO(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check,	     BUS_ADRALN, 0	  )
+DO_ERROR_INFO(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check)
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
-- 
cgit v1.2.1


From 1c326c4dfe182a1c4c1e39f2c00f04c380d11692 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 8 May 2014 20:04:11 +0200
Subject: x86/traps: Shift fill_trap_info() from DO_ERROR_INFO() to
 do_error_trap()

Move the callsite of fill_trap_info() into do_error_trap() and remove
the "siginfo_t *info" argument.

This obviously breaks DO_ERROR() which passed info == NULL, we simply
change fill_trap_info() to return "siginfo_t *" and add the "default"
case which returns SEND_SIG_PRIV.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/traps.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1cf8a4c409b6..c9dd74124038 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -136,13 +136,16 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 	return -1;
 }
 
-static void fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
-			   siginfo_t *info)
+static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
+				siginfo_t *info)
 {
 	unsigned long siaddr;
 	int sicode;
 
 	switch (trapnr) {
+	default:
+		return SEND_SIG_PRIV;
+
 	case X86_TRAP_DE:
 		sicode = FPE_INTDIV;
 		siaddr = regs->ip;
@@ -161,6 +164,7 @@ static void fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
 	info->si_errno = 0;
 	info->si_code = sicode;
 	info->si_addr = (void __user *)siaddr;
+	return info;
 }
 
 static void __kprobes
@@ -199,14 +203,16 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 }
 
 static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
-			  unsigned long trapnr, int signr, siginfo_t *info)
+			  unsigned long trapnr, int signr)
 {
 	enum ctx_state prev_state = exception_enter();
+	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
 			NOTIFY_STOP) {
 		conditional_sti(regs);
-		do_trap(trapnr, signr, str, regs, error_code, info);
+		do_trap(trapnr, signr, str, regs, error_code,
+			fill_trap_info(regs, signr, trapnr, &info));
 	}
 
 	exception_exit(prev_state);
@@ -215,16 +221,13 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
 #define DO_ERROR(trapnr, signr, str, name)				\
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
-	do_error_trap(regs, error_code, str, trapnr, signr, NULL);	\
+	do_error_trap(regs, error_code, str, trapnr, signr);		\
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name)				\
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
-	siginfo_t info;							\
-									\
-	fill_trap_info(regs, signr, trapnr, &info);			\
-	do_error_trap(regs, error_code, str, trapnr, signr, &info);	\
+	do_error_trap(regs, error_code, str, trapnr, signr);		\
 }
 
 DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",			divide_error)
-- 
cgit v1.2.1


From 0eb14833d5b1ea1accfeffb71be5de5929f85da9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 8 May 2014 20:12:24 +0200
Subject: x86/traps: Kill DO_ERROR_INFO()

Now that DO_ERROR_INFO() doesn't differ from DO_ERROR() we can remove
it and use DO_ERROR() instead.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/traps.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c9dd74124038..73b3ea32245a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -224,23 +224,17 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 	do_error_trap(regs, error_code, str, trapnr, signr);		\
 }
 
-#define DO_ERROR_INFO(trapnr, signr, str, name)				\
-dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
-{									\
-	do_error_trap(regs, error_code, str, trapnr, signr);		\
-}
-
-DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",			divide_error)
-DO_ERROR     (X86_TRAP_OF,     SIGSEGV, "overflow",			overflow)
-DO_ERROR     (X86_TRAP_BR,     SIGSEGV, "bounds",			bounds)
-DO_ERROR_INFO(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op)
-DO_ERROR     (X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",	coprocessor_segment_overrun)
-DO_ERROR     (X86_TRAP_TS,     SIGSEGV, "invalid TSS",			invalid_TSS)
-DO_ERROR     (X86_TRAP_NP,     SIGBUS,  "segment not present",		segment_not_present)
+DO_ERROR(X86_TRAP_DE,     SIGFPE,  "divide error",		divide_error)
+DO_ERROR(X86_TRAP_OF,     SIGSEGV, "overflow",			overflow)
+DO_ERROR(X86_TRAP_BR,     SIGSEGV, "bounds",			bounds)
+DO_ERROR(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op)
+DO_ERROR(X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",coprocessor_segment_overrun)
+DO_ERROR(X86_TRAP_TS,     SIGSEGV, "invalid TSS",		invalid_TSS)
+DO_ERROR(X86_TRAP_NP,     SIGBUS,  "segment not present",	segment_not_present)
 #ifdef CONFIG_X86_32
-DO_ERROR     (X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment)
+DO_ERROR(X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment)
 #endif
-DO_ERROR_INFO(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check)
+DO_ERROR(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check)
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
-- 
cgit v1.2.1


From b02ef20a9fba08948e643d3eec0efadf1da01a44 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 12 May 2014 18:24:45 +0200
Subject: uprobes/x86: Fix the wrong ->si_addr when xol triggers a trap

If the probed insn triggers a trap, ->si_addr = regs->ip is technically
correct, but this is not what the signal handler wants; we need to pass
the address of the probed insn, not the address of xol slot.

Add the new arch-agnostic helper, uprobe_get_trap_addr(), and change
fill_trap_info() and math_error() to use it. !CONFIG_UPROBES case in
uprobes.h uses a macro to avoid include hell and ensure that it can be
compiled even if an architecture doesn't define instruction_pointer().

Test-case:

	#include <signal.h>
	#include <stdio.h>
	#include <unistd.h>

	extern void probe_div(void);

	void sigh(int sig, siginfo_t *info, void *c)
	{
		int passed = (info->si_addr == probe_div);
		printf(passed ? "PASS\n" : "FAIL\n");
		_exit(!passed);
	}

	int main(void)
	{
		struct sigaction sa = {
			.sa_sigaction	= sigh,
			.sa_flags	= SA_SIGINFO,
		};

		sigaction(SIGFPE, &sa, NULL);

		asm (
			"xor %ecx,%ecx\n"
			".globl probe_div; probe_div:\n"
			"idiv %ecx\n"
		);

		return 0;
	}

it fails if probe_div() is probed.

Note: show_unhandled_signals users should probably use this helper too,
but we need to cleanup them first.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
---
 arch/x86/kernel/traps.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 73b3ea32245a..3fdb20548c4b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/uprobes.h>
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
@@ -148,11 +149,11 @@ static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
 
 	case X86_TRAP_DE:
 		sicode = FPE_INTDIV;
-		siaddr = regs->ip;
+		siaddr = uprobe_get_trap_addr(regs);
 		break;
 	case X86_TRAP_UD:
 		sicode = ILL_ILLOPN;
-		siaddr = regs->ip;
+		siaddr = uprobe_get_trap_addr(regs);
 		break;
 	case X86_TRAP_AC:
 		sicode = BUS_ADRALN;
@@ -531,7 +532,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	task->thread.error_code = error_code;
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
-	info.si_addr = (void __user *)regs->ip;
+	info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
 	if (trapnr == X86_TRAP_MF) {
 		unsigned short cwd, swd;
 		/*
-- 
cgit v1.2.1