summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/docs/LangRef.rst62
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h4
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td6
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp34
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp47
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td16
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp14
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td16
-rw-r--r--llvm/test/CodeGen/X86/absdiff_128.ll181
-rw-r--r--llvm/test/CodeGen/X86/absdiff_256.ll29
16 files changed, 35 insertions, 399 deletions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 7f1a97428ee..58198f7af7d 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -11217,68 +11217,6 @@ Examples:
%r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c
-
-'``llvm.uabsdiff.*``' and '``llvm.sabsdiff.*``' Intrinsics
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Syntax:
-"""""""
-This is an overloaded intrinsic. The loaded data is a vector of any integer bit width.
-
-.. code-block:: llvm
-
- declare <4 x integer> @llvm.uabsdiff.v4i32(<4 x integer> %a, <4 x integer> %b)
-
-
-Overview:
-"""""""""
-
-The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference
-of the two operands, treating them both as unsigned integers. The intermediate
-calculations are computed using infinitely precise unsigned arithmetic. The final
-result will be truncated to the given type.
-
-The ``llvm.sabsdiff`` intrinsic returns a vector result of the absolute difference of
-the two operands, treating them both as signed integers. If the result overflows, the
-behavior is undefined.
-
-.. note::
-
- These intrinsics are primarily used during the code generation stage of compilation.
- They are generated by compiler passes such as the Loop and SLP vectorizers. It is not
- recommended for users to create them manually.
-
-Arguments:
-""""""""""
-
-Both intrinsics take two integer of the same bitwidth.
-
-Semantics:
-""""""""""
-
-The expression::
-
- call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
-
-is equivalent to::
-
- %1 = zext <4 x i32> %a to <4 x i64>
- %2 = zext <4 x i32> %b to <4 x i64>
- %sub = sub <4 x i64> %1, %2
- %trunc = trunc <4 x i64> to <4 x i32>
-
-and the expression::
-
- call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
-
-is equivalent to::
-
- %sub = sub nsw <4 x i32> %a, %b
- %ispos = icmp sge <4 x i32> %sub, zeroinitializer
- %neg = sub nsw <4 x i32> zeroinitializer, %sub
- %1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
-
-
Half Precision Floating Point Intrinsics
----------------------------------------
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 4be993a9fbb..158ff3cd36a 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -338,10 +338,6 @@ namespace ISD {
/// Byte Swap and Counting operators.
BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
- /// [SU]ABSDIFF - Signed/Unsigned absolute difference of two input integer
- /// vector. These nodes are generated from llvm.*absdiff* intrinsics.
- SABSDIFF, UABSDIFF,
-
/// Bit counting operators with an undefined result for zero inputs.
CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e838fb332de..2ede1ee11f2 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -631,12 +631,6 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty],
def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
[], "llvm.clear_cache">;
-// Calculate the Absolute Differences of the two input vectors.
-def int_sabsdiff : Intrinsic<[llvm_anyvector_ty],
- [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
-def int_uabsdiff : Intrinsic<[llvm_anyvector_ty],
- [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
-
//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 6ca253a1d1e..56547365840 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -396,8 +396,6 @@ def smax : SDNode<"ISD::SMAX" , SDTIntBinOp>;
def umin : SDNode<"ISD::UMIN" , SDTIntBinOp>;
def umax : SDNode<"ISD::UMAX" , SDTIntBinOp>;
-def sabsdiff : SDNode<"ISD::SABSDIFF" , SDTIntBinOp>;
-def uabsdiff : SDNode<"ISD::UABSDIFF" , SDTIntBinOp>;
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index ea537fff168..2cfcf77b17a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -147,10 +147,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
break;
- case ISD::UABSDIFF:
- case ISD::SABSDIFF:
- Res = PromoteIntRes_SimpleIntBinOp(N);
- break;
}
// If the result is null then the sub-method took care of registering it.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 8295b2a19dd..eddf666c9c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,7 +105,6 @@ class VectorLegalizer {
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
- SDValue ExpandABSDIFF(SDValue Op);
/// \brief Implements vector promotion.
///
@@ -330,8 +329,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- case ISD::UABSDIFF:
- case ISD::SABSDIFF:
QueryType = Node->getValueType(0);
break;
case ISD::FP_ROUND_INREG:
@@ -718,42 +715,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandFNEG(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
- case ISD::UABSDIFF:
- case ISD::SABSDIFF:
- return ExpandABSDIFF(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
}
-SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) {
- SDLoc dl(Op);
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- EVT VT = Op.getValueType();
-
- // For unsigned intrinsic, promote the type to handle unsigned overflow.
- bool isUabsdiff = (Op->getOpcode() == ISD::UABSDIFF);
- if (isUabsdiff) {
- VT = VT.widenIntegerVectorElementType(*DAG.getContext());
- Op0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op0);
- Op1 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op1);
- }
-
- SDNodeFlags Flags;
- Flags.setNoSignedWrap(!isUabsdiff);
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op0, Op1, &Flags);
- if (isUabsdiff)
- return DAG.getNode(ISD::TRUNCATE, dl, Op.getValueType(), Sub);
-
- SDValue Cmp =
- DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), VT),
- Sub, DAG.getConstant(0, dl, VT), DAG.getCondCode(ISD::SETGE));
- SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Sub, &Flags);
- return DAG.getNode(ISD::VSELECT, dl, VT, Cmp, Sub, Neg);
-}
-
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
// Lower a select instruction where the condition is a scalar and the
// operands are vectors. Lower this select to VSELECT and implement it
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 96b8cc065f5..d9f02f4ae72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -684,8 +684,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- case ISD::UABSDIFF:
- case ISD::SABSDIFF:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 867b9562019..91aa3eee01b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4880,18 +4880,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return nullptr;
- case Intrinsic::uabsdiff:
- setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
- return nullptr;
- case Intrinsic::sabsdiff:
- setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
- return nullptr;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 7c5492b554c..a1c6c4c1dd6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -235,8 +235,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
- case ISD::UABSDIFF: return "uabsdiff";
- case ISD::SABSDIFF: return "sabsdiff";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 68bca2e7036..c5972263046 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -826,8 +826,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::USUBO, VT, Expand);
setOperationAction(ISD::SMULO, VT, Expand);
setOperationAction(ISD::UMULO, VT, Expand);
- setOperationAction(ISD::UABSDIFF, VT, Expand);
- setOperationAction(ISD::SABSDIFF, VT, Expand);
+
setOperationAction(ISD::BITREVERSE, VT, Expand);
// These library functions default to expand.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 99b2edb38ef..f9af05e84d2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -691,12 +691,10 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
- // [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from
- // i64.
+ // [SU][MIN|MAX] are available for all NEON types apart from i64.
if (!VT.isFloatingPoint() &&
VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
- for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
- ISD::SABSDIFF, ISD::UABSDIFF})
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
@@ -8251,15 +8249,14 @@ static SDValue performAddSubLongCombine(SDNode *N,
// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))
//
-static SDValue tryCombineLongOpWithDup(SDNode *N,
+static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
- bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
- SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0);
- SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
@@ -8277,13 +8274,8 @@ static SDValue tryCombineLongOpWithDup(SDNode *N,
return SDValue();
}
- // N could either be an intrinsic or a sabsdiff/uabsdiff node.
- if (IsIntrinsic)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
- N->getOperand(0), LHS, RHS);
- else
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- LHS, RHS);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), LHS, RHS);
}
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
@@ -8401,12 +8393,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_fmin:
return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
- case Intrinsic::aarch64_neon_sabd:
- return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
- case Intrinsic::aarch64_neon_uabd:
- return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmaxnm:
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
@@ -8417,7 +8403,7 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_umull:
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_sqdmull:
- return tryCombineLongOpWithDup(N, DCI, DAG);
+ return tryCombineLongOpWithDup(IID, N, DCI, DAG);
case Intrinsic::aarch64_neon_sqshl:
case Intrinsic::aarch64_neon_uqshl:
case Intrinsic::aarch64_neon_sqshlu:
@@ -8442,15 +8428,18 @@ static SDValue performExtendCombine(SDNode *N,
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
- (N->getOperand(0).getOpcode() == ISD::SABSDIFF ||
- N->getOperand(0).getOpcode() == ISD::UABSDIFF)) {
+ N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
SDNode *ABDNode = N->getOperand(0).getNode();
- SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG);
- if (!NewABD.getNode())
- return SDValue();
+ unsigned IID = getIntrinsicID(ABDNode);
+ if (IID == Intrinsic::aarch64_neon_sabd ||
+ IID == Intrinsic::aarch64_neon_uabd) {
+ SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
+ if (!NewABD.getNode())
+ return SDValue();
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
- NewABD);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
+ NewABD);
+ }
}
// This is effectively a custom type legalization for AArch64.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2ee1299b6fd..70a1f849f1a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2633,7 +2633,7 @@ defm FMOV : FPMoveImmediate<"fmov">;
//===----------------------------------------------------------------------===//
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- uabsdiff>;
+ int_aarch64_neon_uabd>;
// Match UABDL in log2-shuffle patterns.
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
(v8i16 (add (sub (zext (v8i8 V64:$opA)),
@@ -2905,8 +2905,8 @@ defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
- TriOpFrag<(add node:$LHS, (sabsdiff node:$MHS, node:$RHS))> >;
-defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", sabsdiff>;
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
+defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
@@ -2924,8 +2924,8 @@ defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
- TriOpFrag<(add node:$LHS, (uabsdiff node:$MHS, node:$RHS))> >;
-defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", uabsdiff>;
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
+defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
@@ -3427,9 +3427,9 @@ defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
- sabsdiff>;
+ int_aarch64_neon_sabd>;
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
- sabsdiff>;
+ int_aarch64_neon_sabd>;
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
@@ -3450,7 +3450,7 @@ defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
- uabsdiff>;
+ int_aarch64_neon_uabd>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index cc9656aa0b4..fc32cf2ce4e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -143,15 +143,10 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
- if (VT.isInteger()) {
- setOperationAction(ISD::SABSDIFF, VT, Legal);
- setOperationAction(ISD::UABSDIFF, VT, Legal);
- }
if (!VT.isFloatingPoint() &&
VT != MVT::v2i64 && VT != MVT::v1i64)
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
-
}
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
@@ -10148,15 +10143,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
// Don't do anything for most intrinsics.
break;
- case Intrinsic::arm_neon_vabds:
- if (!N->getValueType(0).isInteger())
- return SDValue();
- return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
- case Intrinsic::arm_neon_vabdu:
- return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
-
// Vector shifts: check for immediate versions and lower them.
// Note: This is done during DAG combining instead of DAG legalizing because
// the build_vectors for 64-bit vector element shift counts are generally
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index af0552a0664..d43535b4e67 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4994,10 +4994,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "s", sabsdiff, 1>;
+ "vabd", "s", int_arm_neon_vabds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "u", uabsdiff, 1>;
+ "vabd", "u", int_arm_neon_vabdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
@@ -5005,9 +5005,9 @@ def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
- "vabdl", "s", sabsdiff, zext, 1>;
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
- "vabdl", "u", uabsdiff, zext, 1>;
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
def abd_shr :
PatFrag<(ops node:$in1, node:$in2, node:$shift),
@@ -5034,15 +5034,15 @@ def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$
// VABA : Vector Absolute Difference and Accumulate
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "s", sabsdiff, add>;
+ "vaba", "s", int_arm_neon_vabds, add>;
defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "u", uabsdiff, add>;
+ "vaba", "u", int_arm_neon_vabdu, add>;
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
- "vabal", "s", sabsdiff, zext, add>;
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
- "vabal", "u", uabsdiff, zext, add>;
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
// Vector Maximum and Minimum.
diff --git a/llvm/test/CodeGen/X86/absdiff_128.ll b/llvm/test/CodeGen/X86/absdiff_128.ll
deleted file mode 100644
index 24055ccc79e..00000000000
--- a/llvm/test/CodeGen/X86/absdiff_128.ll
+++ /dev/null
@@ -1,181 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
-
-declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>)
-
-define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
-; CHECK-LABEL: test_uabsdiff_v4i8_expand
-; CHECK: pshufd
-; CHECK: movd
-; CHECK: subl
-; CHECK: punpckldq
-; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
-; CHECK-DAG: movd %xmm0, [[DST:%.*]]
-; CHECK: subl [[SRC]], [[DST]]
-; CHECK: movd
-; CHECK: pshufd
-; CHECK: movd
-; CHECK: punpckldq
-; CHECK: movdqa
-; CHECK: retq
-
- %1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
- ret <4 x i8> %1
-}
-
-declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>)
-
-define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
-; CHECK-LABEL: test_sabsdiff_v4i8_expand
-; CHECK: psubd
-; CHECK: pcmpgtd
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
- ret <4 x i8> %1
-}
-
-declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) {
-; CHECK-LABEL: test_sabsdiff_v8i8_expand
-; CHECK: psubw
-; CHECK: pcmpgtw
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2)
- ret <8 x i8> %1
-}
-
-declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) {
-; CHECK-LABEL: test_uabsdiff_v16i8_expand
-; CHECK: movd
-; CHECK: movzbl
-; CHECK: movzbl
-; CHECK: subl
-; CHECK: punpcklbw
-; CHECK: retq
-
- %1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2)
- ret <16 x i8> %1
-}
-
-declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
-; CHECK-LABEL: test_uabsdiff_v8i16_expand
-; CHECK: pextrw
-; CHECK: pextrw
-; CHECK: subl
-; CHECK: punpcklwd
-; CHECK: retq
-
- %1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
- ret <8 x i16> %1
-}
-
-declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
-; CHECK-LABEL: test_sabsdiff_v8i16_expand
-; CHECK: psubw
-; CHECK: pcmpgtw
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
- ret <8 x i16> %1
-}
-
-declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
-; CHECK-LABEL: test_sabsdiff_v4i32_expand
-; CHECK: psubd
-; CHECK: pcmpgtd
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
- %1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
-; CHECK-LABEL: test_uabsdiff_v4i32_expand
-; CHECK: pshufd
-; CHECK: movd
-; CHECK: subl
-; CHECK: punpckldq
-; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
-; CHECK-DAG: movd %xmm0, [[DST:%.*]]
-; CHECK: subl [[SRC]], [[DST]]
-; CHECK: movd
-; CHECK: pshufd
-; CHECK: movd
-; CHECK: punpckldq
-; CHECK: movdqa
-; CHECK: retq
-
- %1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
- ret <4 x i32> %1
-}
-
-declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) {
-; CHECK-LABEL: test_sabsdiff_v2i32_expand
-; CHECK: psubq
-; CHECK: pcmpgtd
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2)
- ret <2 x i32> %1
-}
-
-declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) {
-; CHECK-LABEL: test_sabsdiff_v2i64_expand
-; CHECK: psubq
-; CHECK: pcmpgtd
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2)
- ret <2 x i64> %1
-}
diff --git a/llvm/test/CodeGen/X86/absdiff_256.ll b/llvm/test/CodeGen/X86/absdiff_256.ll
deleted file mode 100644
index acc8a1fa51d..00000000000
--- a/llvm/test/CodeGen/X86/absdiff_256.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
-
-declare <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16>, <16 x i16>)
-
-define <16 x i16> @test_sabsdiff_v16i16_expand(<16 x i16> %a1, <16 x i16> %a2) {
-; CHECK-LABEL: test_sabsdiff_v16i16_expand:
-; CHECK: # BB#0:
-; CHECK: psubw
-; CHECK: pxor
-; CHECK: pcmpgtw
-; CHECK: movdqa
-; CHECK: pandn
-; CHECK: pxor
-; CHECK: psubw
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK: pandn
-; CHECK: por
-; CHECK: pcmpgtw
-; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC:%xmm[0-9]+]]
-; CHECK-DAG: pxor {{%xmm[0-9]+}}, [[DST:%xmm[0-9]+]]
-; CHECK: pandn [[SRC]], [[DST]]
-; CHECK: por
-; CHECK: movdqa
-; CHECK: retq
- %1 = call <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16> %a1, <16 x i16> %a2)
- ret <16 x i16> %1
-}
-
OpenPOWER on IntegriCloud