summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/docs/LangRef.rst59
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h4
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td6
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp28
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp2
-rw-r--r--llvm/test/CodeGen/X86/absdiff_expand.ll242
11 files changed, 363 insertions, 0 deletions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index e7d6f67c939..17ee4b32bc3 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -10328,6 +10328,65 @@ Examples:
%r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c
+
+'``llvm.uabsdiff.*``' and '``llvm.sabsdiff.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic. The loaded data is a vector of any integer bit width.
+
+.. code-block:: llvm
+
+ declare <4 x integer> @llvm.uabsdiff.v4i32(<4 x integer> %a, <4 x integer> %b)
+
+
+Overview:
+"""""""""
+
+The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference of the two operands,
+treating them both as unsigned integers.
+
+The ``llvm.sabsdiff`` intrinsic returns a vector result of the absolute difference of the two operands,
+treating them both as signed integers.
+
+.. note::
+
+ These intrinsics are primarily used during the code generation stage of compilation.
+ They are generated by compiler passes such as the Loop and SLP vectorizers.it is not
+ recommended for users to create them manually.
+
+Arguments:
+""""""""""
+
+Both intrinsics take two integer of the same bitwidth.
+
+Semantics:
+""""""""""
+
+The expression::
+
+ call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+is equivalent to::
+
+ %sub = sub <4 x i32> %a, %b
+ %ispos = icmp ugt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %neg = sub <4 x i32> zeroinitializer, %sub
+ %1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
+
+Similarly the expression::
+
+ call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+is equivalent to::
+
+ %sub = sub nsw <4 x i32> %a, %b
+ %ispos = icmp sgt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %neg = sub nsw <4 x i32> zeroinitializer, %sub
+ %1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
+
+
Half Precision Floating Point Intrinsics
----------------------------------------
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index fa44301a2d4..8a4b779f03a 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -334,6 +334,10 @@ namespace ISD {
/// Byte Swap and Counting operators.
BSWAP, CTTZ, CTLZ, CTPOP,
+ /// [SU]ABSDIFF - Signed/Unsigned absolute difference of two input integer
+ /// vector. These nodes are generated from llvm.*absdiff* intrinsics.
+ SABSDIFF, UABSDIFF,
+
/// Bit counting operators with an undefined result for zero inputs.
CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index bbae720b4e1..af312be186c 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -605,6 +605,12 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty],
def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
[], "llvm.clear_cache">;
+// Calculate the Absolute Differences of the two input vectors.
+def int_sabsdiff : Intrinsic<[llvm_anyvector_ty],
+ [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
+def int_uabsdiff : Intrinsic<[llvm_anyvector_ty],
+ [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
+
//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 4abbe379399..6c7eef14715 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -386,6 +386,8 @@ def smax : SDNode<"ISD::SMAX" , SDTIntBinOp>;
def umin : SDNode<"ISD::UMIN" , SDTIntBinOp>;
def umax : SDNode<"ISD::UMAX" , SDTIntBinOp>;
+def sabsdiff : SDNode<"ISD::SABSDIFF" , SDTIntBinOp>;
+def uabsdiff : SDNode<"ISD::UABSDIFF" , SDTIntBinOp>;
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9f060a09a0f..511239ce477 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -146,6 +146,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
break;
+ case ISD::UABSDIFF:
+ case ISD::SABSDIFF:
+ Res = PromoteIntRes_SimpleIntBinOp(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 83d4ad5ea1f..0f25a610724 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,7 @@ class VectorLegalizer {
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandABSDIFF(SDValue Op);
/// \brief Implements vector promotion.
///
@@ -326,6 +327,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::UABSDIFF:
+ case ISD::SABSDIFF:
QueryType = Node->getValueType(0);
break;
case ISD::FP_ROUND_INREG:
@@ -708,11 +711,36 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandFNEG(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
+ case ISD::UABSDIFF:
+ case ISD::SABSDIFF:
+ return ExpandABSDIFF(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
}
+SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) {
+ SDLoc dl(Op);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ EVT VT = Op.getValueType();
+ SDNodeFlags Flags;
+ Flags.setNoSignedWrap(Op->getOpcode() == ISD::SABSDIFF);
+
+ Tmp2 = Op.getOperand(0);
+ Tmp3 = Op.getOperand(1);
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp3, &Flags);
+ Tmp2 =
+ DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Tmp1, &Flags);
+ Tmp4 = DAG.getNode(
+ ISD::SETCC, dl,
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Tmp2,
+ DAG.getConstant(0, dl, VT),
+ DAG.getCondCode(Op->getOpcode() == ISD::SABSDIFF ? ISD::SETLT
+ : ISD::SETULT));
+ Tmp1 = DAG.getNode(ISD::VSELECT, dl, VT, Tmp4, Tmp1, Tmp2);
+ return Tmp1;
+}
+
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
// Lower a select instruction where the condition is a scalar and the
// operands are vectors. Lower this select to VSELECT and implement it
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4348ab79f7d..5f9afc9cfc5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -678,6 +678,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::UABSDIFF:
+ case ISD::SABSDIFF:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5c8db914845..73de6e3cfbd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4646,6 +4646,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return nullptr;
+ case Intrinsic::uabsdiff:
+ setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ case Intrinsic::sabsdiff:
+ setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 5b9b18286fa..8dabddc642b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -225,6 +225,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
+ case ISD::UABSDIFF: return "uabsdiff";
+ case ISD::SABSDIFF: return "sabsdiff";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 50240bf7046..e6d07f5134b 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -827,6 +827,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::USUBO, VT, Expand);
setOperationAction(ISD::SMULO, VT, Expand);
setOperationAction(ISD::UMULO, VT, Expand);
+ setOperationAction(ISD::UABSDIFF, VT, Expand);
+ setOperationAction(ISD::SABSDIFF, VT, Expand);
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
diff --git a/llvm/test/CodeGen/X86/absdiff_expand.ll b/llvm/test/CodeGen/X86/absdiff_expand.ll
new file mode 100644
index 00000000000..8ba87274d9b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/absdiff_expand.ll
@@ -0,0 +1,242 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=CHECK
+
+declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>)
+
+define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
+; CHECK-LABEL: test_uabsdiff_v4i8_expand
+; CHECK: psubd %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: psubd %xmm0, %xmm1
+; CHECK-NEXT: movdqa .LCPI{{[0-9_]*}}
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: retq
+
+ %1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
+ ret <4 x i8> %1
+}
+
+declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>)
+
+define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
+; CHECK-LABEL: test_sabsdiff_v4i8_expand
+; CHECK: psubd %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: psubd %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pandn %xmm2, %xmm1
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
+ ret <4 x i8> %1
+}
+
+
+declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) {
+; CHECK-LABEL: test_sabsdiff_v8i8_expand
+; CHECK: psubw %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: psubw %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtw %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pandn %xmm2, %xmm1
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2)
+ ret <8 x i8> %1
+}
+
+declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) {
+; CHECK-LABEL: test_uabsdiff_v16i8_expand
+; CHECK: psubb %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: psubb %xmm0, %xmm1
+; CHECK-NEXT: movdqa .LCPI{{[0-9_]*}}
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtb %xmm3, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2)
+ ret <16 x i8> %1
+}
+
+declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_uabsdiff_v8i16_expand
+; CHECK: psubw %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: psubw %xmm0, %xmm1
+; CHECK-NEXT: movdqa .LCPI{{[0-9_]*}}
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtw %xmm3, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
+ ret <8 x i16> %1
+}
+
+declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_sabsdiff_v8i16_expand
+; CHECK: psubw %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: psubw %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtw %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pandn %xmm2, %xmm1
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
+ ret <8 x i16> %1
+}
+
+declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_sabsdiff_v4i32_expand
+; CHECK: psubd %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: psubd %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pandn %xmm2, %xmm1
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_uabsdiff_v4i32_expand
+; CHECK: psubd %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: psubd %xmm0, %xmm1
+; CHECK-NEXT: movdqa .LCPI{{[0-9_]*}}
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
+ ret <4 x i32> %1
+}
+
+declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: test_sabsdiff_v2i32_expand
+; CHECK: psubq %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: psubq %xmm0, %xmm1
+; CHECK-NEXT: movdqa .LCPI{{[0-9_]*}}
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: movdqa %xmm2, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm4
+; CHECK-NEXT: pshufd $160, %xmm4, %xmm5 # xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm3
+; CHECK-NEXT: pshufd $245, %xmm3, %xmm2 # xmm2 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm2
+; CHECK-NEXT: pshufd $245, %xmm4, %xmm3 # xmm3 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm3, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm3
+; CHECK-NEXT: por %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2)
+ ret <2 x i32> %1
+}
+
+declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) {
+; CHECK-LABEL: test_sabsdiff_v2i64_expand
+; CHECK: psubq %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: psubq %xmm0, %xmm1
+; CHECK-NEXT: movdqa .LCPI{{[0-9_]*}}
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: movdqa %xmm2, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm4
+; CHECK-NEXT: pshufd $160, %xmm4, %xmm5 # xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm3
+; CHECK-NEXT: pshufd $245, %xmm3, %xmm2 # xmm2 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm2
+; CHECK-NEXT: pshufd $245, %xmm4, %xmm3 # xmm3 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm3, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm3
+; CHECK-NEXT: por %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2)
+ ret <2 x i64> %1
+}
+
+declare <16 x i32> @llvm.sabsdiff.v16i32(<16 x i32>, <16 x i32>)
+
+define <16 x i32> @test_sabsdiff_v16i32_expand(<16 x i32> %a1, <16 x i32> %a2) {
+; CHECK-LABEL: test_sabsdiff_v16i32_expand
+; CHECK: psubd %xmm4, %xmm0
+; CHECK-NEXT: pxor %xmm8, %xmm8
+; CHECK-NEXT: pxor %xmm9, %xmm9
+; CHECK-NEXT: psubd %xmm0, %xmm9
+; CHECK-NEXT: pxor %xmm4, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm9, %xmm4
+; CHECK-NEXT: pand %xmm4, %xmm0
+; CHECK-NEXT: pandn %xmm9, %xmm4
+; CHECK-NEXT: por %xmm4, %xmm0
+; CHECK-NEXT: psubd %xmm5, %xmm1
+; CHECK-NEXT: pxor %xmm4, %xmm4
+; CHECK-NEXT: psubd %xmm1, %xmm4
+; CHECK-NEXT: pxor %xmm5, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm4, %xmm5
+; CHECK-NEXT: pand %xmm5, %xmm1
+; CHECK-NEXT: pandn %xmm4, %xmm5
+; CHECK-NEXT: por %xmm5, %xmm1
+; CHECK-NEXT: psubd %xmm6, %xmm2
+; CHECK-NEXT: pxor %xmm4, %xmm4
+; CHECK-NEXT: psubd %xmm2, %xmm4
+; CHECK-NEXT: pxor %xmm5, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm4, %xmm5
+; CHECK-NEXT: pand %xmm5, %xmm2
+; CHECK-NEXT: pandn %xmm4, %xmm5
+; CHECK-NEXT: por %xmm5, %xmm2
+; CHECK-NEXT: psubd %xmm7, %xmm3
+; CHECK-NEXT: pxor %xmm4, %xmm4
+; CHECK-NEXT: psubd %xmm3, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm4, %xmm8
+; CHECK-NEXT: pand %xmm8, %xmm3
+; CHECK-NEXT: pandn %xmm4, %xmm8
+; CHECK-NEXT: por %xmm8, %xmm3
+; CHECK-NEXT: retq
+ %1 = call <16 x i32> @llvm.sabsdiff.v16i32(<16 x i32> %a1, <16 x i32> %a2)
+ ret <16 x i32> %1
+}
+
OpenPOWER on IntegriCloud