summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2019-12-02 14:47:22 +0000
committerDavid Green <david.green@arm.com>2019-12-02 19:57:12 +0000
commit57d96ab593dfff39dc6eb8fa5f25eaf64af26ee1 (patch)
tree588a831b17cbc17e468054e0fbcbf785971383dd
parent63aff5cd3c83cc1000cf9c85388e62071086dda2 (diff)
downloadbcm5719-llvm-57d96ab593dfff39dc6eb8fa5f25eaf64af26ee1.tar.gz
bcm5719-llvm-57d96ab593dfff39dc6eb8fa5f25eaf64af26ee1.zip
[ARM] Add some VCMP folding and canonicalisation
The VCMP instructions in MVE can accept a register or ZR, but only as the right hand operator. Most of the time this will already be correct because the icmp will have been canonicalised that way already. There are some cases in the lowering of float conditions that this will not apply to though. This code should fix up those cases. Differential Revision: https://reviews.llvm.org/D70822
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp19
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp51
-rw-r--r--llvm/lib/Target/ARM/Utils/ARMBaseInfo.h19
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll80
4 files changed, 94 insertions, 75 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index ef10c9f738e..cecc16ffccb 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2723,25 +2723,6 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
return false;
}
-/// getSwappedCondition - assume the flags are set by MI(a,b), return
-/// the condition code if we modify the instructions such that flags are
-/// set by MI(b,a).
-inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
- switch (CC) {
- default: return ARMCC::AL;
- case ARMCC::EQ: return ARMCC::EQ;
- case ARMCC::NE: return ARMCC::NE;
- case ARMCC::HS: return ARMCC::LS;
- case ARMCC::LO: return ARMCC::HI;
- case ARMCC::HI: return ARMCC::LO;
- case ARMCC::LS: return ARMCC::HS;
- case ARMCC::GE: return ARMCC::LE;
- case ARMCC::LT: return ARMCC::GT;
- case ARMCC::GT: return ARMCC::LT;
- case ARMCC::LE: return ARMCC::GE;
- }
-}
-
/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
/// the condition code if we modify the instructions such that flags are
/// set by ADD(a,b,X).
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8271c6fad0f..3dcddd73f30 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -8993,6 +8993,12 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
ST->getMemOperand());
}
+static bool isZeroVector(SDValue N) {
+ return (ISD::isBuildVectorAllZeros(N.getNode()) ||
+ (N->getOpcode() == ARMISD::VMOVIMM &&
+ isNullConstant(N->getOperand(0))));
+}
+
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
MVT VT = Op.getSimpleValueType();
@@ -9000,13 +9006,7 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
SDValue PassThru = N->getPassThru();
SDLoc dl(Op);
- auto IsZero = [](SDValue PassThru) {
- return (ISD::isBuildVectorAllZeros(PassThru.getNode()) ||
- (PassThru->getOpcode() == ARMISD::VMOVIMM &&
- isNullConstant(PassThru->getOperand(0))));
- };
-
- if (IsZero(PassThru))
+ if (isZeroVector(PassThru))
return Op;
// MVE Masked loads use zero as the passthru value. Here we convert undef to
@@ -9020,7 +9020,7 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
SDValue Combo = NewLoad;
if (!PassThru.isUndef() &&
(PassThru.getOpcode() != ISD::BITCAST ||
- !IsZero(PassThru->getOperand(0))))
+ !isZeroVector(PassThru->getOperand(0))))
Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
}
@@ -12743,6 +12743,39 @@ PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return SDValue();
}
+static SDValue PerformVCMPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasMVEIntegerOps())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ ARMCC::CondCodes Cond =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ SDLoc dl(N);
+
+ // vcmp X, 0, cc -> vcmpz X, cc
+ if (isZeroVector(Op1))
+ return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0,
+ N->getOperand(2));
+
+ unsigned SwappedCond = getSwappedCondition(Cond);
+ if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
+ // vcmp 0, X, cc -> vcmpz X, reversed(cc)
+ if (isZeroVector(Op0))
+ return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
+ DCI.DAG.getConstant(SwappedCond, dl, MVT::i32));
+ // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
+ if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
+ return DCI.DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
+ DCI.DAG.getConstant(SwappedCond, dl, MVT::i32));
+ }
+
+ return SDValue();
+}
+
/// PerformInsertEltCombine - Target-specific dag combine xforms for
/// ISD::INSERT_VECTOR_ELT.
static SDValue PerformInsertEltCombine(SDNode *N,
@@ -14423,6 +14456,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformARMBUILD_VECTORCombine(N, DCI);
case ARMISD::PREDICATE_CAST:
return PerformPREDICATE_CASTCombine(N, DCI);
+ case ARMISD::VCMP:
+ return PerformVCMPCombine(N, DCI, Subtarget);
case ARMISD::SMULWB: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
diff --git a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
index aa3aca359cb..11cb1a162e2 100644
--- a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
@@ -64,6 +64,25 @@ inline static CondCodes getOppositeCondition(CondCodes CC) {
case LE: return GT;
}
}
+
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::EQ: return ARMCC::EQ;
+ case ARMCC::NE: return ARMCC::NE;
+ case ARMCC::HS: return ARMCC::LS;
+ case ARMCC::LO: return ARMCC::HI;
+ case ARMCC::HI: return ARMCC::LO;
+ case ARMCC::LS: return ARMCC::HS;
+ case ARMCC::GE: return ARMCC::LE;
+ case ARMCC::LT: return ARMCC::GT;
+ case ARMCC::GT: return ARMCC::LT;
+ case ARMCC::LE: return ARMCC::GE;
+ }
+}
} // end namespace ARMCC
namespace ARMVCC {
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
index b9e7ad6964e..82ef5df349a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
@@ -107,9 +107,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float>
;
; CHECK-MVEFP-LABEL: vcmp_one_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0
-; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3
+; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
@@ -380,9 +379,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float>
;
; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0
-; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3
+; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
@@ -698,9 +696,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float>
;
; CHECK-MVEFP-LABEL: vcmp_ord_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0
-; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3
+; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
@@ -753,9 +750,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float>
;
; CHECK-MVEFP-LABEL: vcmp_uno_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0
-; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3
+; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
@@ -1013,9 +1009,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, <8 x half> %a
;
; CHECK-MVEFP-LABEL: vcmp_one_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0
-; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3
+; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
@@ -1632,9 +1627,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ueq_v8f16(<8 x half> %src, <8 x half> %a
;
; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0
-; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3
+; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
@@ -2358,9 +2352,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %a
;
; CHECK-MVEFP-LABEL: vcmp_ord_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0
-; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3
+; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
@@ -2481,9 +2474,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %a
;
; CHECK-MVEFP-LABEL: vcmp_uno_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0
-; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3
+; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
@@ -2600,9 +2592,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x floa
;
; CHECK-MVEFP-LABEL: vcmp_r_one_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f32 le, q0, q3
-; CHECK-MVEFP-NEXT: vcmpt.f32 le, q3, q0
+; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
@@ -2873,9 +2864,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, <4 x floa
;
; CHECK-MVEFP-LABEL: vcmp_r_ueq_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f32 le, q0, q3
-; CHECK-MVEFP-NEXT: vcmpt.f32 le, q3, q0
+; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
@@ -3191,9 +3181,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, <4 x floa
;
; CHECK-MVEFP-LABEL: vcmp_r_ord_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f32 le, q0, q3
-; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q3, q0
+; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
@@ -3246,9 +3235,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_uno_v4f32(<4 x float> %src, <4 x floa
;
; CHECK-MVEFP-LABEL: vcmp_r_uno_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f32 le, q0, q3
-; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q3, q0
+; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
@@ -3506,9 +3494,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_one_v8f16(<8 x half> %src, <8 x half>
;
; CHECK-MVEFP-LABEL: vcmp_r_one_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f16 le, q0, q3
-; CHECK-MVEFP-NEXT: vcmpt.f16 le, q3, q0
+; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
@@ -4125,9 +4112,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ueq_v8f16(<8 x half> %src, <8 x half>
;
; CHECK-MVEFP-LABEL: vcmp_r_ueq_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f16 le, q0, q3
-; CHECK-MVEFP-NEXT: vcmpt.f16 le, q3, q0
+; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
@@ -4851,9 +4837,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ord_v8f16(<8 x half> %src, <8 x half>
;
; CHECK-MVEFP-LABEL: vcmp_r_ord_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f16 le, q0, q3
-; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q3, q0
+; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
@@ -4974,9 +4959,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_uno_v8f16(<8 x half> %src, <8 x half>
;
; CHECK-MVEFP-LABEL: vcmp_r_uno_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vpt.f16 le, q0, q3
-; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q3, q0
+; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr
+; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
OpenPOWER on IntegriCloud