diff options
-rw-r--r-- | llvm/docs/LangRef.rst | 62 | ||||
-rw-r--r-- | llvm/include/llvm/CodeGen/ISDOpcodes.h | 4 | ||||
-rw-r--r-- | llvm/include/llvm/IR/Intrinsics.td | 6 | ||||
-rw-r--r-- | llvm/include/llvm/Target/TargetSelectionDAG.td | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 34 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 47 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrNEON.td | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/absdiff_128.ll | 181 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/absdiff_256.ll | 29 |
16 files changed, 35 insertions, 399 deletions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 7f1a97428ee..58198f7af7d 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -11217,68 +11217,6 @@ Examples: %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c - -'``llvm.uabsdiff.*``' and '``llvm.sabsdiff.*``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" -This is an overloaded intrinsic. The loaded data is a vector of any integer bit width. - -.. code-block:: llvm - - declare <4 x integer> @llvm.uabsdiff.v4i32(<4 x integer> %a, <4 x integer> %b) - - -Overview: -""""""""" - -The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference -of the two operands, treating them both as unsigned integers. The intermediate -calculations are computed using infinitely precise unsigned arithmetic. The final -result will be truncated to the given type. - -The ``llvm.sabsdiff`` intrinsic returns a vector result of the absolute difference of -the two operands, treating them both as signed integers. If the result overflows, the -behavior is undefined. - -.. note:: - - These intrinsics are primarily used during the code generation stage of compilation. - They are generated by compiler passes such as the Loop and SLP vectorizers. It is not - recommended for users to create them manually. - -Arguments: -"""""""""" - -Both intrinsics take two integer of the same bitwidth. - -Semantics: -"""""""""" - -The expression:: - - call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b) - -is equivalent to:: - - %1 = zext <4 x i32> %a to <4 x i64> - %2 = zext <4 x i32> %b to <4 x i64> - %sub = sub <4 x i64> %1, %2 - %trunc = trunc <4 x i64> to <4 x i32> - -and the expression:: - - call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b) - -is equivalent to:: - - %sub = sub nsw <4 x i32> %a, %b - %ispos = icmp sge <4 x i32> %sub, zeroinitializer - %neg = sub nsw <4 x i32> zeroinitializer, %sub - %1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg - - Half Precision Floating Point Intrinsics ---------------------------------------- diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 4be993a9fbb..158ff3cd36a 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -338,10 +338,6 @@ namespace ISD { /// Byte Swap and Counting operators. BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE, - /// [SU]ABSDIFF - Signed/Unsigned absolute difference of two input integer - /// vector. These nodes are generated from llvm.*absdiff* intrinsics. - SABSDIFF, UABSDIFF, - /// Bit counting operators with an undefined result for zero inputs. CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF, diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e838fb332de..2ede1ee11f2 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -631,12 +631,6 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty], def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], "llvm.clear_cache">; -// Calculate the Absolute Differences of the two input vectors. -def int_sabsdiff : Intrinsic<[llvm_anyvector_ty], - [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>; -def int_uabsdiff : Intrinsic<[llvm_anyvector_ty], - [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>; - //===-------------------------- Masked Intrinsics -------------------------===// // def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>, diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 6ca253a1d1e..56547365840 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -396,8 +396,6 @@ def smax : SDNode<"ISD::SMAX" , SDTIntBinOp>; def umin : SDNode<"ISD::UMIN" , SDTIntBinOp>; def umax : SDNode<"ISD::UMAX" , SDTIntBinOp>; -def sabsdiff : SDNode<"ISD::SABSDIFF" , SDTIntBinOp>; -def uabsdiff : SDNode<"ISD::UABSDIFF" , SDTIntBinOp>; def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>; def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index ea537fff168..2cfcf77b17a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -147,10 +147,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo); break; - case ISD::UABSDIFF: - case ISD::SABSDIFF: - Res = PromoteIntRes_SimpleIntBinOp(N); - break; } // If the result is null then the sub-method took care of registering it. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 8295b2a19dd..eddf666c9c3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -105,7 +105,6 @@ class VectorLegalizer { SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); - SDValue ExpandABSDIFF(SDValue Op); /// \brief Implements vector promotion. /// @@ -330,8 +329,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: - case ISD::UABSDIFF: - case ISD::SABSDIFF: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -718,42 +715,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return ExpandFNEG(Op); case ISD::SETCC: return UnrollVSETCC(Op); - case ISD::UABSDIFF: - case ISD::SABSDIFF: - return ExpandABSDIFF(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } } -SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) { - SDLoc dl(Op); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - EVT VT = Op.getValueType(); - - // For unsigned intrinsic, promote the type to handle unsigned overflow. - bool isUabsdiff = (Op->getOpcode() == ISD::UABSDIFF); - if (isUabsdiff) { - VT = VT.widenIntegerVectorElementType(*DAG.getContext()); - Op0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op0); - Op1 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op1); - } - - SDNodeFlags Flags; - Flags.setNoSignedWrap(!isUabsdiff); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op0, Op1, &Flags); - if (isUabsdiff) - return DAG.getNode(ISD::TRUNCATE, dl, Op.getValueType(), Sub); - - SDValue Cmp = - DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(), - *DAG.getContext(), VT), - Sub, DAG.getConstant(0, dl, VT), DAG.getCondCode(ISD::SETGE)); - SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Sub, &Flags); - return DAG.getNode(ISD::VSELECT, dl, VT, Cmp, Sub, Neg); -} - SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 96b8cc065f5..d9f02f4ae72 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -684,8 +684,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: - case ISD::UABSDIFF: - case ISD::SABSDIFF: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 867b9562019..91aa3eee01b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4880,18 +4880,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; - case Intrinsic::uabsdiff: - setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); - return nullptr; - case Intrinsic::sabsdiff: - setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); - return nullptr; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 7c5492b554c..a1c6c4c1dd6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -235,8 +235,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SHL_PARTS: return "shl_parts"; case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; - case ISD::UABSDIFF: return "uabsdiff"; - case ISD::SABSDIFF: return "sabsdiff"; // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 68bca2e7036..c5972263046 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -826,8 +826,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::USUBO, VT, Expand); setOperationAction(ISD::SMULO, VT, Expand); setOperationAction(ISD::UMULO, VT, Expand); - setOperationAction(ISD::UABSDIFF, VT, Expand); - setOperationAction(ISD::SABSDIFF, VT, Expand); + setOperationAction(ISD::BITREVERSE, VT, Expand); // These library functions default to expand. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 99b2edb38ef..f9af05e84d2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -691,12 +691,10 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); - // [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from - // i64. + // [SU][MIN|MAX] are available for all NEON types apart from i64. if (!VT.isFloatingPoint() && VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64) - for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, - ISD::SABSDIFF, ISD::UABSDIFF}) + for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT.getSimpleVT(), Legal); // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!). @@ -8251,15 +8249,14 @@ static SDValue performAddSubLongCombine(SDNode *N, // (aarch64_neon_umull (extract_high (v2i64 vec))) // (extract_high (v2i64 (dup128 scalar))))) // -static SDValue tryCombineLongOpWithDup(SDNode *N, +static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { if (DCI.isBeforeLegalizeOps()) return SDValue(); - bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN; - SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0); - SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); assert(LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && "unexpected shape for long operation"); @@ -8277,13 +8274,8 @@ static SDValue tryCombineLongOpWithDup(SDNode *N, return SDValue(); } - // N could either be an intrinsic or a sabsdiff/uabsdiff node. - if (IsIntrinsic) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), - N->getOperand(0), LHS, RHS); - else - return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - LHS, RHS); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), + N->getOperand(0), LHS, RHS); } static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { @@ -8401,12 +8393,6 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_neon_fmin: return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); - case Intrinsic::aarch64_neon_sabd: - return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); - case Intrinsic::aarch64_neon_uabd: - return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_fmaxnm: return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); @@ -8417,7 +8403,7 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_neon_umull: case Intrinsic::aarch64_neon_pmull: case Intrinsic::aarch64_neon_sqdmull: - return tryCombineLongOpWithDup(N, DCI, DAG); + return tryCombineLongOpWithDup(IID, N, DCI, DAG); case Intrinsic::aarch64_neon_sqshl: case Intrinsic::aarch64_neon_uqshl: case Intrinsic::aarch64_neon_sqshlu: @@ -8442,15 +8428,18 @@ static SDValue performExtendCombine(SDNode *N, // helps the backend to decide that an sabdl2 would be useful, saving a real // extract_high operation. if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND && - (N->getOperand(0).getOpcode() == ISD::SABSDIFF || - N->getOperand(0).getOpcode() == ISD::UABSDIFF)) { + N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) { SDNode *ABDNode = N->getOperand(0).getNode(); - SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG); - if (!NewABD.getNode()) - return SDValue(); + unsigned IID = getIntrinsicID(ABDNode); + if (IID == Intrinsic::aarch64_neon_sabd || + IID == Intrinsic::aarch64_neon_uabd) { + SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG); + if (!NewABD.getNode()) + return SDValue(); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), - NewABD); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), + NewABD); + } } // This is effectively a custom type legalization for AArch64. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 2ee1299b6fd..70a1f849f1a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2633,7 +2633,7 @@ defm FMOV : FPMoveImmediate<"fmov">; //===----------------------------------------------------------------------===// defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", - uabsdiff>; + int_aarch64_neon_uabd>; // Match UABDL in log2-shuffle patterns. def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), (v8i16 (add (sub (zext (v8i8 V64:$opA)), @@ -2905,8 +2905,8 @@ defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", - TriOpFrag<(add node:$LHS, (sabsdiff node:$MHS, node:$RHS))> >; -defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", sabsdiff>; + TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >; +defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>; defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>; defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; @@ -2924,8 +2924,8 @@ defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", - TriOpFrag<(add node:$LHS, (uabsdiff node:$MHS, node:$RHS))> >; -defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", uabsdiff>; + TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >; +defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>; defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>; defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; @@ -3427,9 +3427,9 @@ defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>; defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", - sabsdiff>; + int_aarch64_neon_sabd>; defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", - sabsdiff>; + int_aarch64_neon_sabd>; defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", @@ -3450,7 +3450,7 @@ defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", - uabsdiff>; + int_aarch64_neon_uabd>; defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index cc9656aa0b4..fc32cf2ce4e 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -143,15 +143,10 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); - if (VT.isInteger()) { - setOperationAction(ISD::SABSDIFF, VT, Legal); - setOperationAction(ISD::UABSDIFF, VT, Legal); - } if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); - } void ARMTargetLowering::addDRTypeForNEON(MVT VT) { @@ -10148,15 +10143,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { // Don't do anything for most intrinsics. break; - case Intrinsic::arm_neon_vabds: - if (!N->getValueType(0).isInteger()) - return SDValue(); - return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); - case Intrinsic::arm_neon_vabdu: - return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); - // Vector shifts: check for immediate versions and lower them. // Note: This is done during DAG combining instead of DAG legalizing because // the build_vectors for 64-bit vector element shift counts are generally diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index af0552a0664..d43535b4e67 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -4994,10 +4994,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "s", sabsdiff, 1>; + "vabd", "s", int_arm_neon_vabds, 1>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "u", uabsdiff, 1>; + "vabd", "u", int_arm_neon_vabdu, 1>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, @@ -5005,9 +5005,9 @@ def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, // VABDL : Vector Absolute Difference Long (Q = | D - D |) defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, - "vabdl", "s", sabsdiff, zext, 1>; + "vabdl", "s", int_arm_neon_vabds, zext, 1>; defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, - "vabdl", "u", uabsdiff, zext, 1>; + "vabdl", "u", int_arm_neon_vabdu, zext, 1>; def abd_shr : PatFrag<(ops node:$in1, node:$in2, node:$shift), @@ -5034,15 +5034,15 @@ def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$ // VABA : Vector Absolute Difference and Accumulate defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "s", sabsdiff, add>; + "vaba", "s", int_arm_neon_vabds, add>; defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "u", uabsdiff, add>; + "vaba", "u", int_arm_neon_vabdu, add>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, - "vabal", "s", sabsdiff, zext, add>; + "vabal", "s", int_arm_neon_vabds, zext, add>; defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, - "vabal", "u", uabsdiff, zext, add>; + "vabal", "u", int_arm_neon_vabdu, zext, add>; // Vector Maximum and Minimum. diff --git a/llvm/test/CodeGen/X86/absdiff_128.ll b/llvm/test/CodeGen/X86/absdiff_128.ll deleted file mode 100644 index 24055ccc79e..00000000000 --- a/llvm/test/CodeGen/X86/absdiff_128.ll +++ /dev/null @@ -1,181 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s - -declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>) - -define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) { -; CHECK-LABEL: test_uabsdiff_v4i8_expand -; CHECK: pshufd -; CHECK: movd -; CHECK: subl -; CHECK: punpckldq -; CHECK-DAG: movd %xmm1, [[SRC:%.*]] -; CHECK-DAG: movd %xmm0, [[DST:%.*]] -; CHECK: subl [[SRC]], [[DST]] -; CHECK: movd -; CHECK: pshufd -; CHECK: movd -; CHECK: punpckldq -; CHECK: movdqa -; CHECK: retq - - %1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2) - ret <4 x i8> %1 -} - -declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>) - -define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) { -; CHECK-LABEL: test_sabsdiff_v4i8_expand -; CHECK: psubd -; CHECK: pcmpgtd -; CHECK: pcmpeqd -; CHECK: pxor -; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]] -; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]] -; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]] -; CHECK: por [[SRC2]], [[DST]] -; CHECK: retq - - %1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2) - ret <4 x i8> %1 -} - -declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) { -; CHECK-LABEL: test_sabsdiff_v8i8_expand -; CHECK: psubw -; CHECK: pcmpgtw -; CHECK: pcmpeqd -; CHECK: pxor -; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]] -; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]] -; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]] -; CHECK: por [[SRC2]], [[DST]] -; CHECK: retq - - %1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2) - ret <8 x i8> %1 -} - -declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) { -; CHECK-LABEL: test_uabsdiff_v16i8_expand -; CHECK: movd -; CHECK: movzbl -; CHECK: movzbl -; CHECK: subl -; CHECK: punpcklbw -; CHECK: retq - - %1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2) - ret <16 x i8> %1 -} - -declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) { -; CHECK-LABEL: test_uabsdiff_v8i16_expand -; CHECK: pextrw -; CHECK: pextrw -; CHECK: subl -; CHECK: punpcklwd -; CHECK: retq - - %1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2) - ret <8 x i16> %1 -} - -declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) { -; CHECK-LABEL: test_sabsdiff_v8i16_expand -; CHECK: psubw -; CHECK: pcmpgtw -; CHECK: pcmpeqd -; CHECK: pxor -; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]] -; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]] -; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]] -; CHECK: por [[SRC2]], [[DST]] -; CHECK: retq - - %1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2) - ret <8 x i16> %1 -} - -declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) { -; CHECK-LABEL: test_sabsdiff_v4i32_expand -; CHECK: psubd -; CHECK: pcmpgtd -; CHECK: pcmpeqd -; CHECK: pxor -; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]] -; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]] -; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]] -; CHECK: por [[SRC2]], [[DST]] -; CHECK: retq - %1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2) - ret <4 x i32> %1 -} - -declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) { -; CHECK-LABEL: test_uabsdiff_v4i32_expand -; CHECK: pshufd -; CHECK: movd -; CHECK: subl -; CHECK: punpckldq -; CHECK-DAG: movd %xmm1, [[SRC:%.*]] -; CHECK-DAG: movd %xmm0, [[DST:%.*]] -; CHECK: subl [[SRC]], [[DST]] -; CHECK: movd -; CHECK: pshufd -; CHECK: movd -; CHECK: punpckldq -; CHECK: movdqa -; CHECK: retq - - %1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2) - ret <4 x i32> %1 -} - -declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) { -; CHECK-LABEL: test_sabsdiff_v2i32_expand -; CHECK: psubq -; CHECK: pcmpgtd -; CHECK: pcmpeqd -; CHECK: pxor -; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]] -; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]] -; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]] -; CHECK: por [[SRC2]], [[DST]] -; CHECK: retq - - %1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2) - ret <2 x i32> %1 -} - -declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) { -; CHECK-LABEL: test_sabsdiff_v2i64_expand -; CHECK: psubq -; CHECK: pcmpgtd -; CHECK: pcmpeqd -; CHECK: pxor -; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]] -; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]] -; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]] -; CHECK: por [[SRC2]], [[DST]] -; CHECK: retq - - %1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2) - ret <2 x i64> %1 -} diff --git a/llvm/test/CodeGen/X86/absdiff_256.ll b/llvm/test/CodeGen/X86/absdiff_256.ll deleted file mode 100644 index acc8a1fa51d..00000000000 --- a/llvm/test/CodeGen/X86/absdiff_256.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s - -declare <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16>, <16 x i16>) - -define <16 x i16> @test_sabsdiff_v16i16_expand(<16 x i16> %a1, <16 x i16> %a2) { -; CHECK-LABEL: test_sabsdiff_v16i16_expand: -; CHECK: # BB#0: -; CHECK: psubw -; CHECK: pxor -; CHECK: pcmpgtw -; CHECK: movdqa -; CHECK: pandn -; CHECK: pxor -; CHECK: psubw -; CHECK: pcmpeqd -; CHECK: pxor -; CHECK: pandn -; CHECK: por -; CHECK: pcmpgtw -; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC:%xmm[0-9]+]] -; CHECK-DAG: pxor {{%xmm[0-9]+}}, [[DST:%xmm[0-9]+]] -; CHECK: pandn [[SRC]], [[DST]] -; CHECK: por -; CHECK: movdqa -; CHECK: retq - %1 = call <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16> %a1, <16 x i16> %a2) - ret <16 x i16> %1 -} - |