summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2019-01-08 10:12:36 +0000
committerSam Parker <sam.parker@arm.com>2019-01-08 10:12:36 +0000
commit53000a74a5c9f1be2ec09bfeb3533a80667e4b2f (patch)
tree2fbf9fc71c7415389fa2e25d11dbcba18f9a50a1 /llvm/lib
parente72804066c934121c066b2d8f5a2da4be17c9502 (diff)
downloadbcm5719-llvm-53000a74a5c9f1be2ec09bfeb3533a80667e4b2f.tar.gz
bcm5719-llvm-53000a74a5c9f1be2ec09bfeb3533a80667e4b2f.zip
[ARM] Add missing patterns for DSP muls
Using a PatLeaf for sext_16_node allowed matching smulbb and smlabb instructions once the operands had been sign extended. But we also need to use sext_inreg operands along with sext_16_node to catch a few more cases that enable use to remove the unnecessary sxth. Differential Revision: https://reviews.llvm.org/D55992 llvm-svn: 350613
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td95
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td45
2 files changed, 60 insertions, 80 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 4f42601a894..46bdeba6bbd 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -426,24 +426,22 @@ def imm16_31 : ImmLeaf<i32, [{
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
def sext_16_node : PatLeaf<(i32 GPR:$a), [{
- if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17)
- return true;
-
- if (N->getOpcode() != ISD::SRA)
- return false;
- if (N->getOperand(0).getOpcode() != ISD::SHL)
- return false;
-
- auto *ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!ShiftVal || ShiftVal->getZExtValue() != 16)
- return false;
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
- ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1));
- if (!ShiftVal || ShiftVal->getZExtValue() != 16)
- return false;
+def sext_bottom_16 : PatFrag<(ops node:$a),
+ (sext_inreg node:$a, i16)>;
+def sext_top_16 : PatFrag<(ops node:$a),
+ (i32 (sra node:$a, (i32 16)))>;
- return true;
-}]>;
+def bb_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sext_bottom_16 node:$a), (sext_bottom_16 node:$b))>;
+def bt_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sext_bottom_16 node:$a), (sra node:$b, (i32 16)))>;
+def tb_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sra node:$a, (i32 16)), (sext_bottom_16 node:$b))>;
+def tt_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sra node:$a, (i32 16)), (sra node:$b, (i32 16)))>;
/// Split a 32-bit immediate into two 16 bit parts.
def hi16 : SDNodeXForm<imm, [{
@@ -4241,29 +4239,25 @@ def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
multiclass AI_smul<string opc> {
def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
- (sext_inreg GPR:$Rm, i16)))]>,
+ [(set GPR:$Rd, (bb_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
- (sra GPR:$Rm, (i32 16))))]>,
+ [(set GPR:$Rd, (bt_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
- (sext_inreg GPR:$Rm, i16)))]>,
+ [(set GPR:$Rd, (tb_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
- (sra GPR:$Rm, (i32 16))))]>,
+ [(set GPR:$Rd, (tt_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
@@ -4287,35 +4281,31 @@ multiclass AI_smla<string opc> {
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd, (add GPR:$Ra,
- (mul (sext_inreg GPRnopc:$Rn, i16),
- (sext_inreg GPRnopc:$Rm, i16))))]>,
+ (bb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16),
- (sra GPRnopc:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (bt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
- (sext_inreg GPRnopc:$Rm, i16))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (tb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
- (sra GPRnopc:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (tt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
@@ -5816,26 +5806,21 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
// smul* and smla*
def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
- (SMULBB GPR:$a, GPR:$b)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
- (SMULTB GPR:$a, GPR:$b)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
- (mul sext_16_node:$a, sext_16_node:$b)),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
- (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
- (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
- (SMLATB GPR:$a, GPR:$b, GPR:$acc)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sext_bottom_16 GPR:$b)),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sext_top_16 GPR:$b)),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sext_top_16 GPR:$a), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_bottom_16 GPR:$b))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_top_16 GPR:$b))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul (sext_top_16 GPR:$a), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b),
(SMULBB GPR:$a, GPR:$b)>;
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 18a7ee4c419..7a6673b49d5 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2732,28 +2732,25 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
}
def t2SMULBB : T2ThreeRegSMUL<0b001, 0b00, "smulbb",
- [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
- (sext_inreg rGPR:$Rm, i16)))]>;
+ [(set rGPR:$Rd, (bb_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULBT : T2ThreeRegSMUL<0b001, 0b01, "smulbt",
- [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
- (sra rGPR:$Rm, (i32 16))))]>;
+ [(set rGPR:$Rd, (bt_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
- [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
- (sext_inreg rGPR:$Rm, i16)))]>;
+ [(set rGPR:$Rd, (tb_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
- [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
- (sra rGPR:$Rm, (i32 16))))]>;
+ [(set rGPR:$Rd, (tt_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb",
[(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt",
[(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>;
-def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
- (t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
-def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))),
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_bottom_16 rGPR:$Rm)),
+ (t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_top_16 rGPR:$Rm)),
(t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
-def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm),
+def : Thumb2DSPPat<(mul (sext_top_16 rGPR:$Rn), sext_16_node:$Rm),
(t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+
def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm),
(t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm),
@@ -2781,18 +2778,13 @@ class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
}
def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb",
- [(set rGPR:$Rd, (add rGPR:$Ra,
- (mul (sext_inreg rGPR:$Rn, i16),
- (sext_inreg rGPR:$Rm, i16))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (bb_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt",
- [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16),
- (sra rGPR:$Rm, (i32 16)))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (bt_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
- [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
- (sext_inreg rGPR:$Rm, i16))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (tb_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
- [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
- (sra rGPR:$Rm, (i32 16)))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (tt_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb",
[(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
@@ -2800,11 +2792,14 @@ def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
(t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-def : Thumb2DSPMulPat<(add rGPR:$Ra,
- (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))),
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn,
+ (sext_bottom_16 rGPR:$Rm))),
+ (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn,
+ (sext_top_16 rGPR:$Rm))),
(t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-def : Thumb2DSPMulPat<(add rGPR:$Ra,
- (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sext_top_16 rGPR:$Rn),
+ sext_16_node:$Rm)),
(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc),
OpenPOWER on IntegriCloud