diff options
author | Sam Parker <sam.parker@arm.com> | 2019-01-08 10:12:36 +0000 |
---|---|---|
committer | Sam Parker <sam.parker@arm.com> | 2019-01-08 10:12:36 +0000 |
commit | 53000a74a5c9f1be2ec09bfeb3533a80667e4b2f (patch) | |
tree | 2fbf9fc71c7415389fa2e25d11dbcba18f9a50a1 /llvm/lib | |
parent | e72804066c934121c066b2d8f5a2da4be17c9502 (diff) | |
download | bcm5719-llvm-53000a74a5c9f1be2ec09bfeb3533a80667e4b2f.tar.gz bcm5719-llvm-53000a74a5c9f1be2ec09bfeb3533a80667e4b2f.zip |
[ARM] Add missing patterns for DSP muls
Using a PatLeaf for sext_16_node allowed matching smulbb and smlabb
instructions once the operands had been sign extended. But we also
need to use sext_inreg operands along with sext_16_node to catch a
few more cases that enable use to remove the unnecessary sxth.
Differential Revision: https://reviews.llvm.org/D55992
llvm-svn: 350613
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrInfo.td | 95 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrThumb2.td | 45 |
2 files changed, 60 insertions, 80 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 4f42601a894..46bdeba6bbd 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -426,24 +426,22 @@ def imm16_31 : ImmLeaf<i32, [{ // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. def sext_16_node : PatLeaf<(i32 GPR:$a), [{ - if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17) - return true; - - if (N->getOpcode() != ISD::SRA) - return false; - if (N->getOperand(0).getOpcode() != ISD::SHL) - return false; - - auto *ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (!ShiftVal || ShiftVal->getZExtValue() != 16) - return false; + return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; +}]>; - ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1)); - if (!ShiftVal || ShiftVal->getZExtValue() != 16) - return false; +def sext_bottom_16 : PatFrag<(ops node:$a), + (sext_inreg node:$a, i16)>; +def sext_top_16 : PatFrag<(ops node:$a), + (i32 (sra node:$a, (i32 16)))>; - return true; -}]>; +def bb_mul : PatFrag<(ops node:$a, node:$b), + (mul (sext_bottom_16 node:$a), (sext_bottom_16 node:$b))>; +def bt_mul : PatFrag<(ops node:$a, node:$b), + (mul (sext_bottom_16 node:$a), (sra node:$b, (i32 16)))>; +def tb_mul : PatFrag<(ops node:$a, node:$b), + (mul (sra node:$a, (i32 16)), (sext_bottom_16 node:$b))>; +def tt_mul : PatFrag<(ops node:$a, node:$b), + (mul (sra node:$a, (i32 16)), (sra node:$b, (i32 16)))>; /// Split a 32-bit immediate into two 16 bit parts. def hi16 : SDNodeXForm<imm, [{ @@ -4241,29 +4239,25 @@ def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), multiclass AI_smul<string opc> { def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), - (sext_inreg GPR:$Rm, i16)))]>, + [(set GPR:$Rd, (bb_mul GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV5TE]>, Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), - (sra GPR:$Rm, (i32 16))))]>, + [(set GPR:$Rd, (bt_mul GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV5TE]>, Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), - (sext_inreg GPR:$Rm, i16)))]>, + [(set GPR:$Rd, (tb_mul GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV5TE]>, Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), - (sra GPR:$Rm, (i32 16))))]>, + [(set GPR:$Rd, (tt_mul GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV5TE]>, Sched<[WriteMUL16, ReadMUL, ReadMUL]>; @@ -4287,35 +4281,31 @@ multiclass AI_smla<string opc> { (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add GPR:$Ra, - (mul (sext_inreg GPRnopc:$Rn, i16), - (sext_inreg GPRnopc:$Rm, i16))))]>, + (bb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), - (sra GPRnopc:$Rm, (i32 16)))))]>, + [(set GPRnopc:$Rd, (add GPR:$Ra, + (bt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), - (sext_inreg GPRnopc:$Rm, i16))))]>, + [(set GPRnopc:$Rd, (add GPR:$Ra, + (tb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), - (sra GPRnopc:$Rm, (i32 16)))))]>, + [(set GPRnopc:$Rd, (add GPR:$Ra, + (tt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; @@ -5816,26 +5806,21 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; // smul* and smla* def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), - (SMULBB GPR:$a, GPR:$b)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), - (SMULTB GPR:$a, GPR:$b)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5MOPat<(add GPR:$acc, - (mul sext_16_node:$a, sext_16_node:$b)), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5MOPat<(add GPR:$acc, - (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5MOPat<(add GPR:$acc, - (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sext_bottom_16 GPR:$b)), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sext_top_16 GPR:$b)), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sext_top_16 GPR:$a), sext_16_node:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_bottom_16 GPR:$b))), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_top_16 GPR:$b))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5MOPat<(add GPR:$acc, (mul (sext_top_16 GPR:$a), sext_16_node:$b)), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b), (SMULBB GPR:$a, GPR:$b)>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 18a7ee4c419..7a6673b49d5 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2732,28 +2732,25 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc, } def t2SMULBB : T2ThreeRegSMUL<0b001, 0b00, "smulbb", - [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16), - (sext_inreg rGPR:$Rm, i16)))]>; + [(set rGPR:$Rd, (bb_mul rGPR:$Rn, rGPR:$Rm))]>; def t2SMULBT : T2ThreeRegSMUL<0b001, 0b01, "smulbt", - [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16), - (sra rGPR:$Rm, (i32 16))))]>; + [(set rGPR:$Rd, (bt_mul rGPR:$Rn, rGPR:$Rm))]>; def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb", - [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)), - (sext_inreg rGPR:$Rm, i16)))]>; + [(set rGPR:$Rd, (tb_mul rGPR:$Rn, rGPR:$Rm))]>; def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt", - [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)), - (sra rGPR:$Rm, (i32 16))))]>; + [(set rGPR:$Rd, (tt_mul rGPR:$Rn, rGPR:$Rm))]>; def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", [(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>; def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", [(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>; -def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn), - (t2SMULBB rGPR:$Rm, rGPR:$Rn)>; -def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))), +def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_bottom_16 rGPR:$Rm)), + (t2SMULBB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_top_16 rGPR:$Rm)), (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; -def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm), +def : Thumb2DSPPat<(mul (sext_top_16 rGPR:$Rn), sext_16_node:$Rm), (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; + def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm), (t2SMULBB rGPR:$Rn, rGPR:$Rm)>; def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm), @@ -2781,18 +2778,13 @@ class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc, } def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb", - [(set rGPR:$Rd, (add rGPR:$Ra, - (mul (sext_inreg rGPR:$Rn, i16), - (sext_inreg rGPR:$Rm, i16))))]>; + [(set rGPR:$Rd, (add rGPR:$Ra, (bb_mul rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt", - [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16), - (sra rGPR:$Rm, (i32 16)))))]>; + [(set rGPR:$Rd, (add rGPR:$Ra, (bt_mul rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb", - [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), - (sext_inreg rGPR:$Rm, i16))))]>; + [(set rGPR:$Rd, (add rGPR:$Ra, (tb_mul rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt", - [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), - (sra rGPR:$Rm, (i32 16)))))]>; + [(set rGPR:$Rd, (add rGPR:$Ra, (tt_mul rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", @@ -2800,11 +2792,14 @@ def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)), (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; -def : Thumb2DSPMulPat<(add rGPR:$Ra, - (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))), +def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, + (sext_bottom_16 rGPR:$Rm))), + (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; +def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, + (sext_top_16 rGPR:$Rm))), (t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; -def : Thumb2DSPMulPat<(add rGPR:$Ra, - (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)), +def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sext_top_16 rGPR:$Rn), + sext_16_node:$Rm)), (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), |