Emit a left-shift instead of a power-of-two multiply for jump-tables

Summary: SelectionDAGLegalize::ExpandNode() inserts an ISD::MUL when lowering a BR_JT opcode. While many backends optimize this multiply into a shift, e.g. the MIPS backend currently always lowers this into a sequence of load-immediate+multiply+mflo in MipsSETargetLowering::lowerMulDiv(). I initially changed the multiply to a shift in the MIPS backend but it turns out that would not have handled the MIPSR6 case and was a lot more code than doing it in LegalizeDAG. I believe performing this simple optimization in LegalizeDAG instead of each individual backend is the better solution since this also fixes other backeds such as MSP430 which calls the multiply runtime function __mspabi_mpyi without this patch. Reviewers: sdardis, atanasyan, pftbest, asl Reviewed By: sdardis Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D45760 llvm-svn: 332439
author: Alexander Richardson <arichardson.kde@gmail.com> 2018-05-16 08:58:26 +0000
committer: Alexander Richardson <arichardson.kde@gmail.com> 2018-05-16 08:58:26 +0000
commit: 8f44579d0bf1da80adebf6a7cd1095eabc1e1e0e (patch)
tree: 7ae3f142009fdd0b124b8204306d8cce218d62b8
parent: 85e38ee18e5c81fcedddf2612f1d7e298c0dad01 (diff)
download: bcm5719-llvm-8f44579d0bf1da80adebf6a7cd1095eabc1e1e0e.tar.gz
bcm5719-llvm-8f44579d0bf1da80adebf6a7cd1095eabc1e1e0e.zip
5 files changed, 87 insertions, 12 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 71bb3c7d024..7a3cd9e3a7d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3688,8 +3688,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     unsigned EntrySize =
       DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
-    Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
-                        DAG.getConstant(EntrySize, dl, Index.getValueType()));
+    // For power-of-two jumptable entry sizes convert multiplication to a shift.
+    // This transformation needs to be done here since otherwise the MIPS
+    // backend will end up emitting a three instruction multiply sequence
+    // instead of a single shift and MSP430 will call a runtime function.
+    if (llvm::isPowerOf2_32(EntrySize))
+      Index = DAG.getNode(
+          ISD::SHL, dl, Index.getValueType(), Index,
+          DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType()));
+    else
+      Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+                          DAG.getConstant(EntrySize, dl, Index.getValueType()));
     SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
                                Index, Table);
 
diff --git a/llvm/test/CodeGen/MSP430/jumptable.ll b/llvm/test/CodeGen/MSP430/jumptable.ll
index b4366251698..49f23166a0a 100644
--- a/llvm/test/CodeGen/MSP430/jumptable.ll
+++ b/llvm/test/CodeGen/MSP430/jumptable.ll
@@ -7,13 +7,16 @@ target triple = "msp430---elf"
 define i16 @test(i16 %i) #0 {
 entry:
 ; CHECK-LABEL: test:
+; CHECK:      sub.w   #4, r1
+; CHECK-NEXT: mov.w   r12, 0(r1)
+; CHECK-NEXT: cmp.w   #4, r12
+; CHECK-NEXT: jhs     .LBB0_3
   %retval = alloca i16, align 2
   %i.addr = alloca i16, align 2
   store i16 %i, i16* %i.addr, align 2
   %0 = load i16, i16* %i.addr, align 2
-; CHECK: mov.w #2, r13
-; CHECK: call #__mspabi_mpyi
-; CHECK: br .LJTI0_0(r12)
+; CHECK:      rla.w r12
+; CHECK-NEXT: br .LJTI0_0(r12)
   switch i16 %0, label %sw.default [
     i16 0, label %sw.bb
     i16 1, label %sw.bb1
diff --git a/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll b/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll
index 087a34f3c1b..c16cea5ea25 100644
--- a/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -28,7 +28,8 @@ entry:
 ; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
 ; PIC-O32: jr  $[[R5]]
 
-; STATIC-N64: mflo $[[R0:[0-9]]]
+; STATIC-N64: dsrl $[[I32:[0-9]]], ${{[0-9]+}}, 32
+; STATIC-N64: dsll $[[R0:[0-9]]], $[[I32]], 3
 ; STATIC-N64: lui $[[R1:[0-9]]], %highest(.LJTI0_0)
 ; STATIC-N64: daddiu $[[R2:[0-9]]], $[[R1]], %higher(.LJTI0_0)
 ; STATIC-N64: dsll $[[R3:[0-9]]], $[[R2]], 16
diff --git a/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll b/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
index c530dd614ef..4f2339d18c3 100644
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
@@ -161,9 +161,7 @@ define i8* @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R2-NEXT:    beqz $1, .LBB0_3
 ; MIPS64R2-NEXT:    nop
 ; MIPS64R2-NEXT:  .LBB0_1: # %entry
-; MIPS64R2-NEXT:    daddiu $1, $zero, 8
-; MIPS64R2-NEXT:    dmult $2, $1
-; MIPS64R2-NEXT:    mflo $1
+; MIPS64R2-NEXT:    dsll $1, $2, 3
 ; MIPS64R2-NEXT:    lui $2, %highest(.LJTI0_0)
 ; MIPS64R2-NEXT:    daddiu $2, $2, %higher(.LJTI0_0)
 ; MIPS64R2-NEXT:    dsll $2, $2, 16
@@ -481,9 +479,7 @@ define i8* @_Z3fooi(i32 signext %Letter) {
 ; PIC-MIPS64R2-NEXT:    beqz $1, .LBB0_3
 ; PIC-MIPS64R2-NEXT:    nop
 ; PIC-MIPS64R2-NEXT:  .LBB0_1: # %entry
-; PIC-MIPS64R2-NEXT:    daddiu $1, $zero, 8
-; PIC-MIPS64R2-NEXT:    dmult $3, $1
-; PIC-MIPS64R2-NEXT:    mflo $1
+; PIC-MIPS64R2-NEXT:    dsll $1, $3, 3
 ; PIC-MIPS64R2-NEXT:    ld $3, %got_page(.LJTI0_0)($2)
 ; PIC-MIPS64R2-NEXT:    daddu $1, $1, $3
 ; PIC-MIPS64R2-NEXT:    ld $1, %got_ofst(.LJTI0_0)($1)
diff --git a/llvm/test/CodeGen/Mips/jump-table-mul.ll b/llvm/test/CodeGen/Mips/jump-table-mul.ll
new file mode 100644
index 00000000000..ef7452cf253
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/jump-table-mul.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; We used to generate a mul+mflo sequence instead of shifting by 2/3 to get the jump table address
+; RUN: llc %s -O2 -mtriple=mips64-unknown-freebsd -target-abi n64 -relocation-model=pic -o - | FileCheck %s
+
+define i64 @test(i64 %arg) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui $1, %hi(%neg(%gp_rel(test)))
+; CHECK-NEXT:    daddu $2, $1, $25
+; CHECK-NEXT:    sltiu $1, $4, 11
+; CHECK-NEXT:    beqz $1, .LBB0_3
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB0_1: # %entry
+; CHECK-NEXT:    daddiu $1, $2, %lo(%neg(%gp_rel(test)))
+; CHECK-NEXT:    dsll $2, $4, 3
+; Previously this dsll was the following sequence:
+;	daddiu	$2, $zero, 8
+;	dmult	$4, $2
+;	mflo	$2
+; CHECK-NEXT:    ld $3, %got_page(.LJTI0_0)($1)
+; CHECK-NEXT:    daddu $2, $2, $3
+; CHECK-NEXT:    ld $2, %got_ofst(.LJTI0_0)($2)
+; CHECK-NEXT:    daddu $1, $2, $1
+; CHECK-NEXT:    jr $1
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB0_2: # %sw.bb
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    daddiu $2, $zero, 1
+; CHECK-NEXT:  .LBB0_3: # %default
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    daddiu $2, $zero, 1234
+; CHECK-NEXT:  .LBB0_4: # %sw.bb1
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    daddiu $2, $zero, 0
+entry:
+  switch i64 %arg, label %default [
+    i64 0, label %sw.bb
+    i64 3, label %sw.bb
+    i64 5, label %sw.bb
+    i64 10, label %sw.bb1
+  ]
+
+default:
+  ret i64 1234
+
+sw.bb:
+  ret i64 1
+
+sw.bb1:
+  ret i64 0
+}
+
+; CHECK-LABEL: 	.section	.rodata,"a",@progbits
+; CHECK-NEXT: 	.p2align	3
+; CHECK-LABEL: .LJTI0_0:
+; CHECK-NEXT: 	.gpdword	.LBB0_2
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_2
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_2
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_4
author	Alexander Richardson <arichardson.kde@gmail.com>	2018-05-16 08:58:26 +0000
committer	Alexander Richardson <arichardson.kde@gmail.com>	2018-05-16 08:58:26 +0000
commit	8f44579d0bf1da80adebf6a7cd1095eabc1e1e0e (patch)
tree	7ae3f142009fdd0b124b8204306d8cce218d62b8
parent	85e38ee18e5c81fcedddf2612f1d7e298c0dad01 (diff)
download	bcm5719-llvm-8f44579d0bf1da80adebf6a7cd1095eabc1e1e0e.tar.gz bcm5719-llvm-8f44579d0bf1da80adebf6a7cd1095eabc1e1e0e.zip