diff options
| author | Alexander Richardson <arichardson.kde@gmail.com> | 2018-05-16 08:58:26 +0000 |
|---|---|---|
| committer | Alexander Richardson <arichardson.kde@gmail.com> | 2018-05-16 08:58:26 +0000 |
| commit | 8f44579d0bf1da80adebf6a7cd1095eabc1e1e0e (patch) | |
| tree | 7ae3f142009fdd0b124b8204306d8cce218d62b8 | |
| parent | 85e38ee18e5c81fcedddf2612f1d7e298c0dad01 (diff) | |
| download | bcm5719-llvm-8f44579d0bf1da80adebf6a7cd1095eabc1e1e0e.tar.gz bcm5719-llvm-8f44579d0bf1da80adebf6a7cd1095eabc1e1e0e.zip | |
Emit a left-shift instead of a power-of-two multiply for jump-tables
Summary:
SelectionDAGLegalize::ExpandNode() inserts an ISD::MUL when lowering a
BR_JT opcode. While many backends optimize this multiply into a shift, e.g.
the MIPS backend currently always lowers this into a sequence of
load-immediate+multiply+mflo in MipsSETargetLowering::lowerMulDiv().
I initially changed the multiply to a shift in the MIPS backend but it
turns out that would not have handled the MIPSR6 case and was a lot more
code than doing it in LegalizeDAG.
I believe performing this simple optimization in LegalizeDAG instead of
each individual backend is the better solution since this also fixes other
backeds such as MSP430 which calls the multiply runtime function
__mspabi_mpyi without this patch.
Reviewers: sdardis, atanasyan, pftbest, asl
Reviewed By: sdardis
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D45760
llvm-svn: 332439
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/MSP430/jumptable.ll | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Mips/2010-07-20-Switch.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Mips/jump-table-mul.ll | 66 |
5 files changed, 87 insertions, 12 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 71bb3c7d024..7a3cd9e3a7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3688,8 +3688,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, - DAG.getConstant(EntrySize, dl, Index.getValueType())); + // For power-of-two jumptable entry sizes convert multiplication to a shift. + // This transformation needs to be done here since otherwise the MIPS + // backend will end up emitting a three instruction multiply sequence + // instead of a single shift and MSP430 will call a runtime function. + if (llvm::isPowerOf2_32(EntrySize)) + Index = DAG.getNode( + ISD::SHL, dl, Index.getValueType(), Index, + DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType())); + else + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EntrySize, dl, Index.getValueType())); SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, Table); diff --git a/llvm/test/CodeGen/MSP430/jumptable.ll b/llvm/test/CodeGen/MSP430/jumptable.ll index b4366251698..49f23166a0a 100644 --- a/llvm/test/CodeGen/MSP430/jumptable.ll +++ b/llvm/test/CodeGen/MSP430/jumptable.ll @@ -7,13 +7,16 @@ target triple = "msp430---elf" define i16 @test(i16 %i) #0 { entry: ; CHECK-LABEL: test: +; CHECK: sub.w #4, r1 +; CHECK-NEXT: mov.w r12, 0(r1) +; CHECK-NEXT: cmp.w #4, r12 +; CHECK-NEXT: jhs .LBB0_3 %retval = alloca i16, align 2 %i.addr = alloca i16, align 2 store i16 %i, i16* %i.addr, align 2 %0 = load i16, i16* %i.addr, align 2 -; CHECK: mov.w #2, r13 -; CHECK: call #__mspabi_mpyi -; CHECK: br .LJTI0_0(r12) +; CHECK: rla.w r12 +; CHECK-NEXT: br .LJTI0_0(r12) switch i16 %0, label %sw.default [ i16 0, label %sw.bb i16 1, label %sw.bb1 diff --git a/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll b/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll index 087a34f3c1b..c16cea5ea25 100644 --- a/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll +++ b/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll @@ -28,7 +28,8 @@ entry: ; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]] ; PIC-O32: jr $[[R5]] -; STATIC-N64: mflo $[[R0:[0-9]]] +; STATIC-N64: dsrl $[[I32:[0-9]]], ${{[0-9]+}}, 32 +; STATIC-N64: dsll $[[R0:[0-9]]], $[[I32]], 3 ; STATIC-N64: lui $[[R1:[0-9]]], %highest(.LJTI0_0) ; STATIC-N64: daddiu $[[R2:[0-9]]], $[[R1]], %higher(.LJTI0_0) ; STATIC-N64: dsll $[[R3:[0-9]]], $[[R2]], 16 diff --git a/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll b/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll index c530dd614ef..4f2339d18c3 100644 --- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll +++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll @@ -161,9 +161,7 @@ define i8* @_Z3fooi(i32 signext %Letter) { ; MIPS64R2-NEXT: beqz $1, .LBB0_3 ; MIPS64R2-NEXT: nop ; MIPS64R2-NEXT: .LBB0_1: # %entry -; MIPS64R2-NEXT: daddiu $1, $zero, 8 -; MIPS64R2-NEXT: dmult $2, $1 -; MIPS64R2-NEXT: mflo $1 +; MIPS64R2-NEXT: dsll $1, $2, 3 ; MIPS64R2-NEXT: lui $2, %highest(.LJTI0_0) ; MIPS64R2-NEXT: daddiu $2, $2, %higher(.LJTI0_0) ; MIPS64R2-NEXT: dsll $2, $2, 16 @@ -481,9 +479,7 @@ define i8* @_Z3fooi(i32 signext %Letter) { ; PIC-MIPS64R2-NEXT: beqz $1, .LBB0_3 ; PIC-MIPS64R2-NEXT: nop ; PIC-MIPS64R2-NEXT: .LBB0_1: # %entry -; PIC-MIPS64R2-NEXT: daddiu $1, $zero, 8 -; PIC-MIPS64R2-NEXT: dmult $3, $1 -; PIC-MIPS64R2-NEXT: mflo $1 +; PIC-MIPS64R2-NEXT: dsll $1, $3, 3 ; PIC-MIPS64R2-NEXT: ld $3, %got_page(.LJTI0_0)($2) ; PIC-MIPS64R2-NEXT: daddu $1, $1, $3 ; PIC-MIPS64R2-NEXT: ld $1, %got_ofst(.LJTI0_0)($1) diff --git a/llvm/test/CodeGen/Mips/jump-table-mul.ll b/llvm/test/CodeGen/Mips/jump-table-mul.ll new file mode 100644 index 00000000000..ef7452cf253 --- /dev/null +++ b/llvm/test/CodeGen/Mips/jump-table-mul.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; We used to generate a mul+mflo sequence instead of shifting by 2/3 to get the jump table address +; RUN: llc %s -O2 -mtriple=mips64-unknown-freebsd -target-abi n64 -relocation-model=pic -o - | FileCheck %s + +define i64 @test(i64 %arg) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui $1, %hi(%neg(%gp_rel(test))) +; CHECK-NEXT: daddu $2, $1, $25 +; CHECK-NEXT: sltiu $1, $4, 11 +; CHECK-NEXT: beqz $1, .LBB0_3 +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_1: # %entry +; CHECK-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test))) +; CHECK-NEXT: dsll $2, $4, 3 +; Previously this dsll was the following sequence: +; daddiu $2, $zero, 8 +; dmult $4, $2 +; mflo $2 +; CHECK-NEXT: ld $3, %got_page(.LJTI0_0)($1) +; CHECK-NEXT: daddu $2, $2, $3 +; CHECK-NEXT: ld $2, %got_ofst(.LJTI0_0)($2) +; CHECK-NEXT: daddu $1, $2, $1 +; CHECK-NEXT: jr $1 +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_2: # %sw.bb +; CHECK-NEXT: jr $ra +; CHECK-NEXT: daddiu $2, $zero, 1 +; CHECK-NEXT: .LBB0_3: # %default +; CHECK-NEXT: jr $ra +; CHECK-NEXT: daddiu $2, $zero, 1234 +; CHECK-NEXT: .LBB0_4: # %sw.bb1 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: daddiu $2, $zero, 0 +entry: + switch i64 %arg, label %default [ + i64 0, label %sw.bb + i64 3, label %sw.bb + i64 5, label %sw.bb + i64 10, label %sw.bb1 + ] + +default: + ret i64 1234 + +sw.bb: + ret i64 1 + +sw.bb1: + ret i64 0 +} + +; CHECK-LABEL: .section .rodata,"a",@progbits +; CHECK-NEXT: .p2align 3 +; CHECK-LABEL: .LJTI0_0: +; CHECK-NEXT: .gpdword .LBB0_2 +; CHECK-NEXT: .gpdword .LBB0_3 +; CHECK-NEXT: .gpdword .LBB0_3 +; CHECK-NEXT: .gpdword .LBB0_2 +; CHECK-NEXT: .gpdword .LBB0_3 +; CHECK-NEXT: .gpdword .LBB0_2 +; CHECK-NEXT: .gpdword .LBB0_3 +; CHECK-NEXT: .gpdword .LBB0_3 +; CHECK-NEXT: .gpdword .LBB0_3 +; CHECK-NEXT: .gpdword .LBB0_3 +; CHECK-NEXT: .gpdword .LBB0_4 |

