summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Bradbury <asb@lowrisc.org>2018-10-12 23:18:52 +0000
committerAlex Bradbury <asb@lowrisc.org>2018-10-12 23:18:52 +0000
commit748d080e6288ec3b2edd1a1bfdcf21491e816fd3 (patch)
tree0e74da5b9307e3ef6d1a553cdc446f1b81693c9e
parent71f484c967ad7b41c9cca6b8c3aef6cf340a8877 (diff)
downloadbcm5719-llvm-748d080e6288ec3b2edd1a1bfdcf21491e816fd3.tar.gz
bcm5719-llvm-748d080e6288ec3b2edd1a1bfdcf21491e816fd3.zip
[RISCV] Eliminate unnecessary masking of promoted shift amounts
SelectionDAGBuilder::visitShift will always zero-extend a shift amount when it is promoted to the ShiftAmountTy. This results in zero-extension (masking) which is unnecessary for RISC-V as the shift operations only read the lower 5 or 6 bits (RV32 or RV64). I initially proposed adding a getExtendForShiftAmount hook so the shift amount can be any-extended (D52975). @efriedma explained this was unsafe, so I have instead eliminate the unnecessary and operations at instruction selection time in a manner similar to X86InstrCompiler.td. Differential Revision: https://reviews.llvm.org/D53224 llvm-svn: 344432
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td23
-rw-r--r--llvm/test/CodeGen/RISCV/alu16.ll9
-rw-r--r--llvm/test/CodeGen/RISCV/alu8.ll5
-rw-r--r--llvm/test/CodeGen/RISCV/shift-masked-shamt.ll70
4 files changed, 90 insertions, 17 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 5ca1cbd165d..50012569a74 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -205,6 +205,12 @@ def ixlenimm : Operand<XLenVT> {
// Standalone (codegen-only) immleaf patterns.
def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>;
+// A mask value that won't affect significant shift bits.
+def immshiftxlen : ImmLeaf<XLenVT, [{
+ if (Subtarget->is64Bit())
+ return countTrailingOnes<uint64_t>(Imm) >= 6;
+ return countTrailingOnes<uint64_t>(Imm) >= 5;
+}]>;
// Addressing modes.
// Necessary because a frameindex can't be matched directly in a pattern.
@@ -646,13 +652,24 @@ def : PatGprGpr<and, AND>;
def : PatGprSimm12<and, ANDI>;
def : PatGprGpr<xor, XOR>;
def : PatGprSimm12<xor, XORI>;
-def : PatGprGpr<shl, SLL>;
def : PatGprUimmLog2XLen<shl, SLLI>;
-def : PatGprGpr<srl, SRL>;
def : PatGprUimmLog2XLen<srl, SRLI>;
-def : PatGprGpr<sra, SRA>;
def : PatGprUimmLog2XLen<sra, SRAI>;
+// Match both a plain shift and one where the shift amount is masked (this is
+// typically introduced when the legalizer promotes the shift amount and
+// zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
+// ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I).
+multiclass VarShiftXLenPat<PatFrag ShiftOp, RVInst Inst> {
+ def : Pat<(ShiftOp GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>;
+ def : Pat<(ShiftOp GPR:$rs1, (and GPR:$rs2, immshiftxlen)),
+ (Inst GPR:$rs1, GPR:$rs2)>;
+}
+
+defm : VarShiftXLenPat<shl, SLL>;
+defm : VarShiftXLenPat<srl, SRL>;
+defm : VarShiftXLenPat<sra, SRA>;
+
/// FrameIndex calculations
def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12),
diff --git a/llvm/test/CodeGen/RISCV/alu16.ll b/llvm/test/CodeGen/RISCV/alu16.ll
index 20b79a987f6..79e74ffc8a5 100644
--- a/llvm/test/CodeGen/RISCV/alu16.ll
+++ b/llvm/test/CodeGen/RISCV/alu16.ll
@@ -6,8 +6,6 @@
; that legalisation of these non-native types doesn't introduce unnecessary
; inefficiencies.
-; TODO: it's unnecessary to mask (zero-extend) the shift amount.
-
define i16 @addi(i16 %a) nounwind {
; RV32I-LABEL: addi:
; RV32I: # %bb.0:
@@ -122,9 +120,6 @@ define i16 @sub(i16 %a, i16 %b) nounwind {
define i16 @sll(i16 %a, i16 %b) nounwind {
; RV32I-LABEL: sll:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a2, 16
-; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: ret
%1 = shl i16 %a, %b
@@ -173,7 +168,6 @@ define i16 @srl(i16 %a, i16 %b) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: lui a2, 16
; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: and a0, a0, a2
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: ret
@@ -184,9 +178,6 @@ define i16 @srl(i16 %a, i16 %b) nounwind {
define i16 @sra(i16 %a, i16 %b) nounwind {
; RV32I-LABEL: sra:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a2, 16
-; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: sra a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/alu8.ll b/llvm/test/CodeGen/RISCV/alu8.ll
index f7d0e8beef3..ad97e620319 100644
--- a/llvm/test/CodeGen/RISCV/alu8.ll
+++ b/llvm/test/CodeGen/RISCV/alu8.ll
@@ -6,8 +6,6 @@
; that legalisation of these non-native types doesn't introduce unnecessary
; inefficiencies.
-; TODO: it's unnecessary to mask (zero-extend) the shift amount.
-
define i8 @addi(i8 %a) nounwind {
; RV32I-LABEL: addi:
; RV32I: # %bb.0:
@@ -118,7 +116,6 @@ define i8 @sub(i8 %a, i8 %b) nounwind {
define i8 @sll(i8 %a, i8 %b) nounwind {
; RV32I-LABEL: sll:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a1, a1, 255
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: ret
%1 = shl i8 %a, %b
@@ -163,7 +160,6 @@ define i8 @xor(i8 %a, i8 %b) nounwind {
define i8 @srl(i8 %a, i8 %b) nounwind {
; RV32I-LABEL: srl:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a1, a1, 255
; RV32I-NEXT: andi a0, a0, 255
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: ret
@@ -174,7 +170,6 @@ define i8 @srl(i8 %a, i8 %b) nounwind {
define i8 @sra(i8 %a, i8 %b) nounwind {
; RV32I-LABEL: sra:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a1, a1, 255
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: sra a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll
new file mode 100644
index 00000000000..5c77aa2d77f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+
+; This test checks that unnecessary masking of shift amount operands is
+; eliminated during instruction selection. The test needs to ensure that the
+; masking is not removed if it may affect the shift amount.
+
+define i32 @sll_redundant_mask(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sll_redundant_mask:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sll a0, a0, a1
+; RV32I-NEXT: ret
+ %1 = and i32 %b, 31
+ %2 = shl i32 %a, %1
+ ret i32 %2
+}
+
+define i32 @sll_non_redundant_mask(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sll_non_redundant_mask:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a1, 15
+; RV32I-NEXT: sll a0, a0, a1
+; RV32I-NEXT: ret
+ %1 = and i32 %b, 15
+ %2 = shl i32 %a, %1
+ ret i32 %2
+}
+
+define i32 @srl_redundant_mask(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: srl_redundant_mask:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srl a0, a0, a1
+; RV32I-NEXT: ret
+ %1 = and i32 %b, 4095
+ %2 = lshr i32 %a, %1
+ ret i32 %2
+}
+
+define i32 @srl_non_redundant_mask(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: srl_non_redundant_mask:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a1, 7
+; RV32I-NEXT: srl a0, a0, a1
+; RV32I-NEXT: ret
+ %1 = and i32 %b, 7
+ %2 = lshr i32 %a, %1
+ ret i32 %2
+}
+
+define i32 @sra_redundant_mask(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sra_redundant_mask:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sra a0, a0, a1
+; RV32I-NEXT: ret
+ %1 = and i32 %b, 65535
+ %2 = ashr i32 %a, %1
+ ret i32 %2
+}
+
+define i32 @sra_non_redundant_mask(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sra_non_redundant_mask:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a1, 32
+; RV32I-NEXT: sra a0, a0, a1
+; RV32I-NEXT: ret
+ %1 = and i32 %b, 32
+ %2 = ashr i32 %a, %1
+ ret i32 %2
+}
OpenPOWER on IntegriCloud