summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM
diff options
context:
space:
mode:
authorJames Molloy <james.molloy@arm.com>2016-10-19 12:06:49 +0000
committerJames Molloy <james.molloy@arm.com>2016-10-19 12:06:49 +0000
commitfbfd173447b64d9d73c44cf47bafeb68e8139e5a (patch)
tree8fd1aa19098e83b1170e6b7d245cde554dcbcc3d /llvm/test/CodeGen/ARM
parent9941ca8af6b4c39fd0b9e47dc7e593d884b55710 (diff)
downloadbcm5719-llvm-fbfd173447b64d9d73c44cf47bafeb68e8139e5a.tar.gz
bcm5719-llvm-fbfd173447b64d9d73c44cf47bafeb68e8139e5a.zip
[Thumb-1] Synthesize TBB/TBH instructions to make use of compressed jump tables
The TBB and TBH instructions in Thumb-2 allow jump tables to be compressed into sequences of bytes or shorts respectively. These instructions do not exist in Thumb-1, however it is possible to synthesize them out of a sequence of other instructions. It turns out this sequence is so short that it's almost never a lose for performance and is ALWAYS a significant win for code size. TBB example: Before: lsls r0, r0, #2 After: add r0, pc adr r1, .LJTI0_0 ldrb r0, [r0, #6] ldr r0, [r0, r1] lsls r0, r0, #1 mov pc, r0 add pc, r0 => No change in prologue code size or dynamic instruction count. Jump table shrunk by a factor of 4. The only case that can increase dynamic instruction count is the TBH case: Before: lsls r0, r4, #2 After: lsls r4, r4, #1 adr r1, .LJTI0_0 add r4, pc ldr r0, [r0, r1] ldrh r4, [r4, #6] mov pc, r0 lsls r4, r4, #1 add pc, r4 => 1 more instruction in prologue. Jump table shrunk by a factor of 2. So there is an argument that this should be disabled when optimizing for performance (and a TBH needs to be generated). I'm not so sure about that in practice, because on small cores with Thumb-1 performance is often tied to code size. But I'm willing to turn it off when optimizing for performance if people want (also note that TBHs are fairly rare in practice!) llvm-svn: 284580
Diffstat (limited to 'llvm/test/CodeGen/ARM')
-rw-r--r--llvm/test/CodeGen/ARM/arm-position-independence-jump-table.ll34
-rw-r--r--llvm/test/CodeGen/ARM/jump-table-tbh.ll55
2 files changed, 68 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/ARM/arm-position-independence-jump-table.ll b/llvm/test/CodeGen/ARM/arm-position-independence-jump-table.ll
index 507f399c1f7..d5b8618be19 100644
--- a/llvm/test/CodeGen/ARM/arm-position-independence-jump-table.ll
+++ b/llvm/test/CodeGen/ARM/arm-position-independence-jump-table.ll
@@ -8,9 +8,9 @@
; RUN: llc -relocation-model=ropi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
-; RUN: llc -relocation-model=static -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_ABS
-; RUN: llc -relocation-model=ropi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_PC
-; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_PC
+; RUN: llc -relocation-model=static -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
+; RUN: llc -relocation-model=ropi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
+; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
declare void @exit0()
@@ -85,30 +85,22 @@ lab4:
; THUMB2: [[LBB4]]
; THUMB2-NEXT: b exit4
-; THUMB1: lsls r[[R_TAB_INDEX:[0-9]+]], r{{[0-9]+}}, #2
-; THUMB1: adr r[[R_TAB_BASE:[0-9]+]], [[LJTI:\.LJTI[0-9]+_[0-9]+]]
-; THUMB1: ldr r[[R_BB_ADDR:[0-9]+]], [r[[R_TAB_INDEX]], r[[R_TAB_BASE]]]
-; THUMB1_PC: adds r[[R_BB_ADDR]], r[[R_BB_ADDR]], r[[R_TAB_BASE]]
-; THUMB1: mov pc, r[[R_BB_ADDR]]
-; THUMB1: [[LJTI]]
-; THUMB1_ABS: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]+1
-; THUMB1_ABS: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]+1
-; THUMB1_ABS: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]+1
-; THUMB1_ABS: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]+1
-; THUMB1_PC: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
-; THUMB1_PC: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
-; THUMB1_PC: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
-; THUMB1_PC: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
+
+; THUMB1: add r[[x:[0-9]+]], pc
+; THUMB1: ldrb r[[x]], [r[[x]], #4]
+; THUMB1: lsls r[[x]], r[[x]], #1
+; THUMB1: [[LCPI:\.LCPI[0-9]+_[0-9]+]]:
+; THUMB1: add pc, r[[x]]
+; THUMB1: .byte ([[LBB1:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
+; THUMB1: .byte ([[LBB2:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
+; THUMB1: .byte ([[LBB3:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
+; THUMB1: .byte ([[LBB4:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
; THUMB1: [[LBB1]]
; THUMB1-NEXT: bl exit1
-; THUMB1-NEXT: pop
; THUMB1: [[LBB2]]
; THUMB1-NEXT: bl exit2
-; THUMB1-NEXT: pop
; THUMB1: [[LBB3]]
; THUMB1-NEXT: bl exit3
-; THUMB1-NEXT: pop
; THUMB1: [[LBB4]]
; THUMB1-NEXT: bl exit4
-; THUMB1-NEXT: pop
}
diff --git a/llvm/test/CodeGen/ARM/jump-table-tbh.ll b/llvm/test/CodeGen/ARM/jump-table-tbh.ll
new file mode 100644
index 00000000000..0a38e3b97a8
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/jump-table-tbh.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=thumbv7m-linux-gnu -o - %s | FileCheck %s --check-prefix=T2
+; RUN: llc -mtriple=thumbv6m-linux-gnu -o - %s | FileCheck %s --check-prefix=T1
+
+declare void @foo(double)
+declare i32 @llvm.arm.space(i32, i32)
+
+define i32 @test_tbh(i1 %tst, i32 %sw, i32 %l) {
+ br label %complex
+
+; T2-LABEL: test_tbh:
+; T2: [[ANCHOR:.LCPI[0-9_]+]]:
+; T2: tbh [pc, r{{[0-9]+}}, lsl #1]
+; T2-NEXT: @ BB#1
+; T2-NEXT: LJTI
+; T2-NEXT: .short (.LBB0_[[x:[0-9]+]]-([[ANCHOR]]+4))/2
+; T2-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2
+; T2-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2
+; T2-NEXT: .short (.LBB0_[[x]]-([[ANCHOR]]+4))/2
+
+; T1-LABEL: test_tbh:
+; T1: lsls [[x:r[0-9]+]], r4, #1
+; T1: add [[x]], pc
+; T1: ldrh [[x]], {{\[}}[[x]], #4]
+; T1: lsls [[x]], [[x]], #1
+; T1: [[ANCHOR:.LCPI[0-9_]+]]:
+; T1: add pc, [[x]]
+; T1-NEXT: @ BB#2
+; T1-NEXT: LJTI
+; T1-NEXT: .short (.LBB0_[[x:[0-9]+]]-([[ANCHOR]]+4))/2
+; T1-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2
+; T1-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2
+; T1-NEXT: .short (.LBB0_[[x]]-([[ANCHOR]]+4))/2
+
+complex:
+ call void @foo(double 12345.0)
+ switch i32 %sw, label %second [ i32 0, label %other
+ i32 1, label %third
+ i32 2, label %end
+ i32 3, label %other ]
+
+second:
+ ret i32 43
+third:
+ call i32 @llvm.arm.space(i32 970, i32 undef)
+ ret i32 0
+
+other:
+ call void @bar()
+ unreachable
+
+end:
+ ret i32 42
+}
+
+declare void @bar()
OpenPOWER on IntegriCloud