summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorRenato Golin <renato.golin@linaro.org>2015-02-25 14:41:06 +0000
committerRenato Golin <renato.golin@linaro.org>2015-02-25 14:41:06 +0000
commitb9887ef32a5d06108dfabbbe181bd8e4ea7abbfe (patch)
treee470f2ae641ff070823cdb61580fd2a777e60b22 /llvm/test
parenta9b01eb77641cce46dcc69bce27635a72187a89c (diff)
downloadbcm5719-llvm-b9887ef32a5d06108dfabbbe181bd8e4ea7abbfe.tar.gz
bcm5719-llvm-b9887ef32a5d06108dfabbbe181bd8e4ea7abbfe.zip
Improve handling of stack accesses in Thumb-1
Thumb-1 only allows SP-based LDR and STR to be word-sized, and SP-base LDR, STR, and ADD only allow offsets that are a multiple of 4. Make some changes to better make use of these instructions: * Use word loads for anyext byte and halfword loads from the stack. * Enforce 4-byte alignment on objects accessed in this way, to ensure that the offset is valid. * Do the same for objects whose frame index is used, in order to avoid having to use more than one ADD to generate the frame index. * Correct how many bits of offset we think AddrModeT1_s has. Patch by John Brawn. llvm-svn: 230496
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll91
-rw-r--r--llvm/test/CodeGen/ARM/atomic-ops-v8.ll2
-rw-r--r--llvm/test/CodeGen/ARM/debug-frame-vararg.ll14
-rw-r--r--llvm/test/CodeGen/ARM/frame-register.ll6
-rw-r--r--llvm/test/CodeGen/ARM/thumb1-varalloc.ll32
-rw-r--r--llvm/test/CodeGen/ARM/thumb1_return_sequence.ll48
-rw-r--r--llvm/test/CodeGen/Thumb/stack-access.ll74
-rw-r--r--llvm/test/CodeGen/Thumb/stm-merge.ll9
-rw-r--r--llvm/test/CodeGen/Thumb/vargs.ll16
9 files changed, 166 insertions, 126 deletions
diff --git a/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll b/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
index f3cc3d82121..de2deadc4e3 100644
--- a/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
+++ b/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
@@ -1,55 +1,48 @@
; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V4T
; RUN: llc -mtriple=thumbv6m-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V6M
-; CHECK-LABEL: foo
-define i32 @foo(i32 %z, ...) #0 {
-entry:
- %a = alloca i32, align 4
- %b = alloca i32, align 4
- %c = alloca i32, align 4
- %d = alloca i32, align 4
- %e = alloca i32, align 4
- %f = alloca i32, align 4
- %g = alloca i32, align 4
- %h = alloca i32, align 4
-
- store i32 1, i32* %a, align 4
- store i32 2, i32* %b, align 4
- store i32 3, i32* %c, align 4
- store i32 4, i32* %d, align 4
- store i32 5, i32* %e, align 4
- store i32 6, i32* %f, align 4
- store i32 7, i32* %g, align 4
- store i32 8, i32* %h, align 4
-
- %0 = load i32* %a, align 4
- %1 = load i32* %b, align 4
- %2 = load i32* %c, align 4
- %3 = load i32* %d, align 4
- %4 = load i32* %e, align 4
- %5 = load i32* %f, align 4
- %6 = load i32* %g, align 4
- %7 = load i32* %h, align 4
-
- %add = add nsw i32 %0, %1
- %add4 = add nsw i32 %add, %2
- %add5 = add nsw i32 %add4, %3
- %add6 = add nsw i32 %add5, %4
- %add7 = add nsw i32 %add6, %5
- %add8 = add nsw i32 %add7, %6
- %add9 = add nsw i32 %add8, %7
-
- %addz = add nsw i32 %add9, %z
- call void @llvm.va_start(i8* null)
- ret i32 %addz
-
-; CHECK: sub sp, #40
-; CHECK-NEXT: add [[BASE:r[0-9]]], sp, #8
-
-; CHECK-V4T: movs [[NEWBASE:r[0-9]]], [[BASE]]
-; CHECK-V6M: mov [[NEWBASE:r[0-9]]], [[BASE]]
-; CHECK-NEXT: adds [[NEWBASE]], #8
+; CHECK-LABEL: test1
+define i32 @test1(i32* %p) {
+
+; Offsets less than 8 can be generated in a single add
+; CHECK: adds [[NEWBASE:r[0-9]]], r0, #4
+ %1 = getelementptr inbounds i32* %p, i32 1
+ %2 = getelementptr inbounds i32* %p, i32 2
+ %3 = getelementptr inbounds i32* %p, i32 3
+ %4 = getelementptr inbounds i32* %p, i32 4
+
; CHECK-NEXT: ldm [[NEWBASE]],
+ %5 = load i32* %1, align 4
+ %6 = load i32* %2, align 4
+ %7 = load i32* %3, align 4
+ %8 = load i32* %4, align 4
+
+ %9 = add nsw i32 %5, %6
+ %10 = add nsw i32 %9, %7
+ %11 = add nsw i32 %10, %8
+ ret i32 %11
}
-declare void @llvm.va_start(i8*) nounwind
+; CHECK-LABEL: test2
+define i32 @test2(i32* %p) {
+
+; Offsets >=8 require a mov and an add
+; CHECK-V4T: movs [[NEWBASE:r[0-9]]], r0
+; CHECK-V6M: mov [[NEWBASE:r[0-9]]], r0
+; CHECK-NEXT: adds [[NEWBASE]], #8
+ %1 = getelementptr inbounds i32* %p, i32 2
+ %2 = getelementptr inbounds i32* %p, i32 3
+ %3 = getelementptr inbounds i32* %p, i32 4
+ %4 = getelementptr inbounds i32* %p, i32 5
+
+; CHECK-NEXT: ldm [[NEWBASE]],
+ %5 = load i32* %1, align 4
+ %6 = load i32* %2, align 4
+ %7 = load i32* %3, align 4
+ %8 = load i32* %4, align 4
+
+ %9 = add nsw i32 %5, %6
+ %10 = add nsw i32 %9, %7
+ %11 = add nsw i32 %10, %8
+ ret i32 %11
+}
diff --git a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
index 7072aaaf733..6ba1352fb18 100644
--- a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1296,7 +1296,7 @@ define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val)
%addr = inttoptr i64 %addr_int to i8*
store atomic i8 %val, i8* %addr monotonic, align 1
-; CHECK-LE: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp]
+; CHECK-LE: ldr{{b?(\.w)?}} [[VAL:r[0-9]+]], [sp]
; CHECK-LE: strb [[VAL]], [r0, r2]
; CHECK-BE: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp, #3]
; CHECK-BE: strb [[VAL]], [r1, r3]
diff --git a/llvm/test/CodeGen/ARM/debug-frame-vararg.ll b/llvm/test/CodeGen/ARM/debug-frame-vararg.ll
index 05521d80646..65be2db6923 100644
--- a/llvm/test/CodeGen/ARM/debug-frame-vararg.ll
+++ b/llvm/test/CodeGen/ARM/debug-frame-vararg.ll
@@ -88,24 +88,22 @@
; CHECK-THUMB-FP: .cfi_startproc
; CHECK-THUMB-FP: sub sp, #16
; CHECK-THUMB-FP: .cfi_def_cfa_offset 16
-; CHECK-THUMB-FP: push {r4, r5, r7, lr}
-; CHECK-THUMB-FP: .cfi_def_cfa_offset 32
+; CHECK-THUMB-FP: push {r4, lr}
+; CHECK-THUMB-FP: .cfi_def_cfa_offset 24
; CHECK-THUMB-FP: .cfi_offset lr, -20
-; CHECK-THUMB-FP: .cfi_offset r7, -24
-; CHECK-THUMB-FP: .cfi_offset r5, -28
-; CHECK-THUMB-FP: .cfi_offset r4, -32
+; CHECK-THUMB-FP: .cfi_offset r4, -24
; CHECK-THUMB-FP: sub sp, #8
-; CHECK-THUMB-FP: .cfi_def_cfa_offset 40
+; CHECK-THUMB-FP: .cfi_def_cfa_offset 32
; CHECK-THUMB-FP-ELIM-LABEL: sum
; CHECK-THUMB-FP-ELIM: .cfi_startproc
; CHECK-THUMB-FP-ELIM: sub sp, #16
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 16
-; CHECK-THUMB-FP-ELIM: push {r4, r5, r7, lr}
+; CHECK-THUMB-FP-ELIM: push {r4, r6, r7, lr}
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 32
; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -20
; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -24
-; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -28
+; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -28
; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -32
; CHECK-THUMB-FP-ELIM: add r7, sp, #8
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 24
diff --git a/llvm/test/CodeGen/ARM/frame-register.ll b/llvm/test/CodeGen/ARM/frame-register.ll
index e6a55bddaf1..b04e376693d 100644
--- a/llvm/test/CodeGen/ARM/frame-register.ll
+++ b/llvm/test/CodeGen/ARM/frame-register.ll
@@ -30,9 +30,9 @@ entry:
; CHECK-ARM: push {r11, lr}
; CHECK-ARM: mov r11, sp
-; CHECK-THUMB: push {r4, r6, r7, lr}
-; CHECK-THUMB: add r7, sp, #8
+; CHECK-THUMB: push {r7, lr}
+; CHECK-THUMB: add r7, sp, #0
; CHECK-DARWIN-ARM: push {r7, lr}
-; CHECK-DARWIN-THUMB: push {r4, r7, lr}
+; CHECK-DARWIN-THUMB: push {r7, lr}
diff --git a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
index 8d5888d38f9..82c4ad571b1 100644
--- a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -43,26 +43,6 @@ bb3:
declare noalias i8* @strdup(i8* nocapture) nounwind
declare i32 @_called_func(i8*, i32*) nounwind
-; Variable ending up at unaligned offset from sp (i.e. not a multiple of 4)
-define void @test_local_var_addr() {
-; CHECK-LABEL: test_local_var_addr:
-
- %addr1 = alloca i8
- %addr2 = alloca i8
-
-; CHECK: mov r0, sp
-; CHECK: adds r0, #{{[0-9]+}}
-; CHECK: blx
- call void @take_ptr(i8* %addr1)
-
-; CHECK: mov r0, sp
-; CHECK: adds r0, #{{[0-9]+}}
-; CHECK: blx
- call void @take_ptr(i8* %addr2)
-
- ret void
-}
-
; Simple variable ending up *at* sp.
define void @test_simple_var() {
; CHECK-LABEL: test_simple_var:
@@ -126,14 +106,16 @@ define void @test_local_var_offset_1020() {
ret void
}
-; Max range addressable with tADDrSPi + tADDi8
-define void @test_local_var_offset_1275() {
-; CHECK-LABEL: test_local_var_offset_1275
+; Max range addressable with tADDrSPi + tADDi8 is 1275, however the automatic
+; 4-byte aligning of objects on the stack combined with 8-byte stack alignment
+; means that 1268 is the max offset we can use.
+define void @test_local_var_offset_1268() {
+; CHECK-LABEL: test_local_var_offset_1268
%addr1 = alloca i8, i32 1
- %addr2 = alloca i8, i32 1275
+ %addr2 = alloca i8, i32 1268
; CHECK: add r0, sp, #1020
-; CHECK: adds r0, #255
+; CHECK: adds r0, #248
; CHECK-NEXT: blx
call void @take_ptr(i8* %addr1)
diff --git a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
index 318e6e40237..c83126098f7 100644
--- a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
+++ b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
@@ -3,7 +3,7 @@
; CHECK-V4T-LABEL: clobberframe
; CHECK-V5T-LABEL: clobberframe
-define <4 x i32> @clobberframe() #0 {
+define <4 x i32> @clobberframe(<6 x i32>* %p) #0 {
entry:
; Prologue
; --------
@@ -11,9 +11,10 @@ entry:
; CHECK-V4T: sub sp,
; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr}
- %b = alloca <4 x i32>, align 16
+ %b = alloca <6 x i32>, align 16
%a = alloca <4 x i32>, align 16
- store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %b, align 16
+ %stuff = load <6 x i32>* %p, align 16
+ store <6 x i32> %stuff, <6 x i32>* %b, align 16
store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
%0 = load <4 x i32>* %a, align 16
ret <4 x i32> %0
@@ -70,40 +71,25 @@ entry:
; CHECK-V4T-LABEL: simpleframe
; CHECK-V5T-LABEL: simpleframe
-define i32 @simpleframe() #0 {
+define i32 @simpleframe(<6 x i32>* %p) #0 {
entry:
; Prologue
; --------
; CHECK-V4T: push {[[SAVED:(r[4567](, )?)+]], lr}
; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr}
- %a = alloca i32, align 4
- %b = alloca i32, align 4
- %c = alloca i32, align 4
- %d = alloca i32, align 4
- store i32 1, i32* %a, align 4
- store i32 2, i32* %b, align 4
- store i32 3, i32* %c, align 4
- store i32 4, i32* %d, align 4
- %0 = load i32* %a, align 4
- %inc = add nsw i32 %0, 1
- store i32 %inc, i32* %a, align 4
- %1 = load i32* %b, align 4
- %inc1 = add nsw i32 %1, 1
- store i32 %inc1, i32* %b, align 4
- %2 = load i32* %c, align 4
- %inc2 = add nsw i32 %2, 1
- store i32 %inc2, i32* %c, align 4
- %3 = load i32* %d, align 4
- %inc3 = add nsw i32 %3, 1
- store i32 %inc3, i32* %d, align 4
- %4 = load i32* %a, align 4
- %5 = load i32* %b, align 4
- %add = add nsw i32 %4, %5
- %6 = load i32* %c, align 4
- %add4 = add nsw i32 %add, %6
- %7 = load i32* %d, align 4
- %add5 = add nsw i32 %add4, %7
+ %0 = load <6 x i32>* %p, align 16
+ %1 = extractelement <6 x i32> %0, i32 0
+ %2 = extractelement <6 x i32> %0, i32 1
+ %3 = extractelement <6 x i32> %0, i32 2
+ %4 = extractelement <6 x i32> %0, i32 3
+ %5 = extractelement <6 x i32> %0, i32 4
+ %6 = extractelement <6 x i32> %0, i32 5
+ %add1 = add nsw i32 %1, %2
+ %add2 = add nsw i32 %add1, %3
+ %add3 = add nsw i32 %add2, %4
+ %add4 = add nsw i32 %add3, %5
+ %add5 = add nsw i32 %add4, %6
ret i32 %add5
; Epilogue
diff --git a/llvm/test/CodeGen/Thumb/stack-access.ll b/llvm/test/CodeGen/Thumb/stack-access.ll
new file mode 100644
index 00000000000..bcffda29167
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb/stack-access.ll
@@ -0,0 +1,74 @@
+; RUN: llc -mtriple=thumb-eabi < %s -o - | FileCheck %s
+
+; Check that stack addresses are generated using a single ADD
+define void @test1(i8** %p) {
+ %x = alloca i8, align 1
+ %y = alloca i8, align 1
+ %z = alloca i8, align 1
+; CHECK: add r1, sp, #8
+; CHECK: str r1, [r0]
+ store i8* %x, i8** %p, align 4
+; CHECK: add r1, sp, #4
+; CHECK: str r1, [r0]
+ store i8* %y, i8** %p, align 4
+; CHECK: mov r1, sp
+; CHECK: str r1, [r0]
+ store i8* %z, i8** %p, align 4
+ ret void
+}
+
+; Stack offsets larger than 1020 still need two ADDs
+define void @test2([1024 x i8]** %p) {
+ %arr1 = alloca [1024 x i8], align 1
+ %arr2 = alloca [1024 x i8], align 1
+; CHECK: add r1, sp, #1020
+; CHECK: adds r1, #4
+; CHECK: str r1, [r0]
+ store [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
+; CHECK: mov r1, sp
+; CHECK: str r1, [r0]
+ store [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
+ ret void
+}
+
+; If possible stack-based lrdb/ldrh are widened to use SP-based addressing
+define i32 @test3() #0 {
+ %x = alloca i8, align 1
+ %y = alloca i8, align 1
+; CHECK: ldr r0, [sp]
+ %1 = load i8* %x, align 1
+; CHECK: ldr r1, [sp, #4]
+ %2 = load i8* %y, align 1
+ %3 = add nsw i8 %1, %2
+ %4 = zext i8 %3 to i32
+ ret i32 %4
+}
+
+define i32 @test4() #0 {
+ %x = alloca i16, align 2
+ %y = alloca i16, align 2
+; CHECK: ldr r0, [sp]
+ %1 = load i16* %x, align 2
+; CHECK: ldr r1, [sp, #4]
+ %2 = load i16* %y, align 2
+ %3 = add nsw i16 %1, %2
+ %4 = zext i16 %3 to i32
+ ret i32 %4
+}
+
+; Don't widen if the value needs to be zero-extended
+define zeroext i8 @test5() {
+ %x = alloca i8, align 1
+; CHECK: mov r0, sp
+; CHECK: ldrb r0, [r0]
+ %1 = load i8* %x, align 1
+ ret i8 %1
+}
+
+define zeroext i16 @test6() {
+ %x = alloca i16, align 2
+; CHECK: mov r0, sp
+; CHECK: ldrh r0, [r0]
+ %1 = load i16* %x, align 2
+ ret i16 %1
+}
diff --git a/llvm/test/CodeGen/Thumb/stm-merge.ll b/llvm/test/CodeGen/Thumb/stm-merge.ll
index 76e71f4da65..d4b4cd2f0e0 100644
--- a/llvm/test/CodeGen/Thumb/stm-merge.ll
+++ b/llvm/test/CodeGen/Thumb/stm-merge.ll
@@ -7,16 +7,17 @@ target triple = "thumbv6m--linux-gnueabi"
@e = internal unnamed_addr global i32* null, align 4
; Function Attrs: nounwind optsize
-define void @fn1() #0 {
+define void @fn1(i32 %x, i32 %y, i32 %z) #0 {
entry:
; CHECK-LABEL: fn1:
; CHECK: stm r[[BASE:[0-9]]]!, {{.*}}
; CHECK-NOT: {{.*}} r[[BASE]]
-; CHECK: ldr r[[BASE]], {{.*}}
%g = alloca i32, align 4
%h = alloca i32, align 4
- store i32 1, i32* %g, align 4
- store i32 0, i32* %h, align 4
+ %i = alloca i32, align 4
+ store i32 %x, i32* %i, align 4
+ store i32 %y, i32* %h, align 4
+ store i32 %z, i32* %g, align 4
%.pr = load i32* @d, align 4
%cmp11 = icmp slt i32 %.pr, 1
br i1 %cmp11, label %for.inc.lr.ph, label %for.body5
diff --git a/llvm/test/CodeGen/Thumb/vargs.ll b/llvm/test/CodeGen/Thumb/vargs.ll
index 4078b01ba30..71e8afa4c54 100644
--- a/llvm/test/CodeGen/Thumb/vargs.ll
+++ b/llvm/test/CodeGen/Thumb/vargs.ll
@@ -6,6 +6,10 @@
define void @f(i32 %a, ...) {
entry:
+; Check that space is reserved above the pushed lr for variadic argument
+; registers to be stored in.
+; CHECK: sub sp, #[[IMM:[0-9]+]]
+; CHECK: push
%va = alloca i8*, align 4 ; <i8**> [#uses=4]
%va.upgrd.1 = bitcast i8** %va to i8* ; <i8*> [#uses=1]
call void @llvm.va_start( i8* %va.upgrd.1 )
@@ -27,6 +31,13 @@ bb7: ; preds = %bb
%va.upgrd.4 = bitcast i8** %va to i8* ; <i8*> [#uses=1]
call void @llvm.va_end( i8* %va.upgrd.4 )
ret void
+
+; The return sequence should pop the lr to r3, recover the stack space used to
+; store variadic argument registers, then return via r3. Possibly there is a pop
+; before this, but only if the function happened to use callee-saved registers.
+; CHECK: pop {r3}
+; CHECK: add sp, #[[IMM]]
+; CHECK: bx r3
}
declare void @llvm.va_start(i8*)
@@ -34,8 +45,3 @@ declare void @llvm.va_start(i8*)
declare i32 @printf(i8*, ...)
declare void @llvm.va_end(i8*)
-
-; CHECK: pop
-; CHECK: pop
-; CHECK-NOT: pop
-
OpenPOWER on IntegriCloud