summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/Thumb2
diff options
context:
space:
mode:
authorMatthias Braun <matze@braunis.de>2016-03-02 19:20:00 +0000
committerMatthias Braun <matze@braunis.de>2016-03-02 19:20:00 +0000
commitf290912d2248687e688779d1d89999df56c14a09 (patch)
tree3fcf0e6854ae16ba357b814c0a0a16f1a88f313c /llvm/test/CodeGen/Thumb2
parent578787ad3055d0a874bb76b148a9d5dc7bd18db5 (diff)
downloadbcm5719-llvm-f290912d2248687e688779d1d89999df56c14a09.tar.gz
bcm5719-llvm-f290912d2248687e688779d1d89999df56c14a09.zip
ARM: Introduce conservative load/store optimization mode
Most of the time ARM has the CCR.UNALIGN_TRP bit set to false which means that unaligned loads/stores do not trap and even extensive testing will not catch these bugs. However the multi/double variants are not affected by this bit and will still trap. In effect a more aggressive load/store optimization will break existing (bad) code. These bugs do not necessarily manifest in the broken code where the misaligned pointer is formed but often later in perfectly legal code where it is accessed. This means recompiling system libraries (which have no alignment bugs) with a newer compiler will break existing applications (with alignment bugs) that worked before. So (under protest) I implemented this safe mode which limits the formation of multi/double operations to cases that are not affected by user code (stack operations like spills/reloads) or cases where the normal operations trap anyway (floating point load/stores). It is disabled by default. Differential Revision: http://reviews.llvm.org/D17015 llvm-svn: 262504
Diffstat (limited to 'llvm/test/CodeGen/Thumb2')
-rw-r--r--llvm/test/CodeGen/Thumb2/thumb2-ldm.ll55
1 files changed, 45 insertions, 10 deletions
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll
index 28903aca326..a5b47411124 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 | FileCheck %s -check-prefix=ALL -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 -arm-assume-misaligned-load-store | FileCheck %s -check-prefix=ALL -check-prefix=CONSERVATIVE
@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
define i32 @t1() {
-; CHECK-LABEL: t1:
-; CHECK: push {r7, lr}
+; ALL-LABEL: t1:
+; ALL: push {r7, lr}
; CHECK: ldrd
-; CHECK: pop {r7, pc}
+; CONSERVATIVE-NOT: ldrd
+; CONSERVATIVE-NOT: ldm
+; ALL: pop {r7, pc}
%tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
%tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
@@ -14,10 +17,12 @@ define i32 @t1() {
}
define i32 @t2() {
-; CHECK-LABEL: t2:
-; CHECK: push {r7, lr}
+; ALL-LABEL: t2:
+; ALL: push {r7, lr}
; CHECK: ldm
-; CHECK: pop {r7, pc}
+; CONSERVATIVE-NOT: ldrd
+; CONSERVATIVE-NOT: ldm
+; ALL: pop {r7, pc}
%tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
@@ -26,10 +31,12 @@ define i32 @t2() {
}
define i32 @t3() {
-; CHECK-LABEL: t3:
-; CHECK: push {r7, lr}
+; ALL-LABEL: t3:
+; ALL: push {r7, lr}
; CHECK: ldm
-; CHECK: pop {r7, pc}
+; CONSERVATIVE-NOT: ldrd
+; CONSERVATIVE-NOT: ldm
+; ALL: pop {r7, pc}
%tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
@@ -37,6 +44,34 @@ define i32 @t3() {
ret i32 %tmp6
}
+@g = common global i32* null
+
+define void @t4(i32 %a0, i32 %a1, i32 %a2) {
+; ALL-LABEL: t4:
+; ALL: stm.w sp, {r0, r1, r2}
+; ALL: blx _ext
+; ALL: ldm.w sp, {r0, r1, r2}
+; ALL: blx _f2
+ %arr = alloca [4 x i32], align 4
+ %p0 = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 0
+ %p1 = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 1
+ %p2 = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 2
+ store i32* %p0, i32** @g, align 8
+
+ store i32 %a0, i32* %p0, align 4
+ store i32 %a1, i32* %p1, align 4
+ store i32 %a2, i32* %p2, align 4
+ call void @ext()
+
+ %v0 = load i32, i32* %p0, align 4
+ %v1 = load i32, i32* %p1, align 4
+ %v2 = load i32, i32* %p2, align 4
+ call i32 @f2(i32 %v0, i32 %v1, i32 %v2)
+ ret void
+}
+
declare i32 @f1(i32, i32)
declare i32 @f2(i32, i32, i32)
+
+declare void @ext()
OpenPOWER on IntegriCloud