summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-03-14 21:26:58 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-03-14 21:26:58 +0000
commitcf2da96c82e6488d382ba320b4749e1c5d6c62f8 (patch)
tree796f1b05477b7113e50c5e862d0af1deadd7786a /llvm
parent5957a1963056e7fbbf978c0519f1fa4ac94767ba (diff)
downloadbcm5719-llvm-cf2da96c82e6488d382ba320b4749e1c5d6c62f8.tar.gz
bcm5719-llvm-cf2da96c82e6488d382ba320b4749e1c5d6c62f8.zip
[SelectionDAG] Add a signed integer absolute ISD node
Reduced version of D26357 - based on the discussion on llvm-dev about canonicalization of UMIN/UMAX/SMIN/SMAX as well as ABS I've reduced that patch to just the ABS ISD node (with x86/sse support) to improve basic combines and lowering. ARM/AArch64, Hexagon, PowerPC and NVPTX all have similar instructions allowing us to make this a generic opcode and move away from the hard coded tablegen patterns which makes it tricky to match more complex patterns. At the moment this patch doesn't attempt legalization as we only create an ABS node if its legal/custom. Differential Revision: https://reviews.llvm.org/D29639 llvm-svn: 297780
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h6
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp29
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp29
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td61
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td1
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td78
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h36
-rw-r--r--llvm/test/CodeGen/X86/combine-abs.ll8
-rw-r--r--llvm/test/CodeGen/X86/viabs.ll142
14 files changed, 152 insertions, 254 deletions
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index f943e48a329..d595d43af50 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -339,6 +339,12 @@ namespace ISD {
/// Bitwise operators - logical and, logical or, logical xor.
AND, OR, XOR,
+ /// ABS - Determine the unsigned absolute value of a signed integer value of
+ /// the same bitwidth.
+ /// Note: A value of INT_MIN will return INT_MIN, no saturation or overflow
+ /// is performed.
+ ABS,
+
/// Shift and rotation operations. After legalization, the type of the
/// shift amount is known to be TLI.getShiftAmountTy(). Before legalization
/// the shift amount can be any type, but care must be taken to ensure it is
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 688857081fd..45a842f77a2 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -413,6 +413,7 @@ def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>;
+def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>;
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 788b52dabe9..0fe1edad7fc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -262,6 +262,7 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitRotate(SDNode *N);
+ SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
SDValue visitBITREVERSE(SDNode *N);
SDValue visitCTLZ(SDNode *N);
@@ -1423,6 +1424,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
+ case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
case ISD::CTLZ: return visitCTLZ(N);
@@ -5004,6 +5006,17 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N01C->getAPIntValue(), DL, VT));
}
}
+
+ // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
+ N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+ }
+
// fold (xor x, x) -> 0
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
@@ -5746,6 +5759,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (abs c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+ // fold (abs (abs x)) -> (abs x)
+ if (N0.getOpcode() == ISD::ABS)
+ return N0;
+ // fold (abs x) -> x iff not-negative
+ if (DAG.SignBitIsZero(N0))
+ return N0;
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4f33eef7513..aed422ac86b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3336,6 +3336,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
break;
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
case ISD::BITREVERSE:
return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
@@ -3455,6 +3458,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
+ case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
@@ -3572,6 +3576,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
+ break;
+ case ISD::ABS:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid ABS!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
break;
case ISD::BSWAP:
assert(VT.isInteger() && VT == Operand.getValueType() &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index ba9ec68f1fe..488c60a28ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -300,6 +300,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
// Bit manipulation
+ case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 518417dd11e..7a2d50e63c5 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -900,6 +900,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
+ setOperationAction(ISD::ABS, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 81c78981b6f..6ab8c4b3275 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -896,6 +896,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
+ setOperationAction(ISD::ABS, MVT::v16i8, Legal);
+ setOperationAction(ISD::ABS, MVT::v8i16, Legal);
+ setOperationAction(ISD::ABS, MVT::v4i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
@@ -1081,6 +1084,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
+ setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
@@ -1287,6 +1291,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasVLX()) {
+ setOperationAction(ISD::ABS, MVT::v4i64, Legal);
+ setOperationAction(ISD::ABS, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
@@ -1383,6 +1389,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+ setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
@@ -1562,6 +1569,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
@@ -21155,6 +21163,25 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
return Lower256IntArith(Op, DAG);
}
+static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ MVT VT = Op.getSimpleValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ SDLoc dl(Op);
+ SDValue Src = Op.getOperand(0);
+ SDValue Lo = extract128BitVector(Src, 0, DAG, dl);
+ SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(ISD::ABS, dl, NewVT, Lo),
+ DAG.getNode(ISD::ABS, dl, NewVT, Hi));
+}
+
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
@@ -23699,6 +23726,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN: return LowerMINMAX(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
@@ -24111,7 +24139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
- case X86ISD::ABS: return "X86ISD::ABS";
case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAXS: return "X86ISD::FMAXS";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 73daf640a18..6f89996c558 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -239,9 +239,6 @@ namespace llvm {
FHADD,
FHSUB,
- // Integer absolute value
- ABS,
-
// Detect Conflicts Within a Vector
CONFLICT,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index a5e3b7a8e34..c378069de42 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -8606,66 +8606,7 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
HasBWI>;
}
-defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
-
-def avx512_v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
- VR128X:$src))>;
-def avx512_v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128X:$src, (i8 15)))>;
-def avx512_v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128X:$src, (i8 31)))>;
-def avx512_v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
- VR256X:$src))>;
-def avx512_v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256X:$src, (i8 15)))>;
-def avx512_v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256X:$src, (i8 31)))>;
-
-let Predicates = [HasBWI, HasVLX] in {
- def : Pat<(xor
- (bc_v2i64 (avx512_v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128X:$src), (avx512_v16i1sextv16i8)))),
- (VPABSBZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v2i64 (avx512_v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128X:$src), (avx512_v8i1sextv8i16)))),
- (VPABSWZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v32i1sextv32i8)),
- (bc_v4i64 (add (v32i8 VR256X:$src), (avx512_v32i1sextv32i8)))),
- (VPABSBZ256rr VR256X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v16i1sextv16i16)),
- (bc_v4i64 (add (v16i16 VR256X:$src), (avx512_v16i1sextv16i16)))),
- (VPABSWZ256rr VR256X:$src)>;
-}
-let Predicates = [HasAVX512, HasVLX] in {
- def : Pat<(xor
- (bc_v2i64 (avx512_v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128X:$src), (avx512_v4i1sextv4i32)))),
- (VPABSDZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v8i1sextv8i32)),
- (bc_v4i64 (add (v8i32 VR256X:$src), (avx512_v8i1sextv8i32)))),
- (VPABSDZ256rr VR256X:$src)>;
-}
-
-let Predicates = [HasAVX512] in {
-def : Pat<(xor
- (bc_v8i64 (v16i1sextv16i32)),
- (bc_v8i64 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
- (VPABSDZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v8i1sextv8i64)),
- (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
- (VPABSQZrr VR512:$src)>;
-}
-let Predicates = [HasBWI] in {
-def : Pat<(xor
- (bc_v8i64 (v64i1sextv64i8)),
- (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))),
- (VPABSBZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v32i1sextv32i16)),
- (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))),
- (VPABSWZrr VR512:$src)>;
-}
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index f4146e450dd..9867ba84bb9 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -355,7 +355,6 @@ def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
-def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index b3a51003250..eeab3393d84 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5266,84 +5266,24 @@ multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
Sched<[WriteVecALULd]>;
}
-// Helper fragments to match sext vXi1 to vXiY.
-def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
- VR128:$src))>;
-def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>;
-def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>;
-def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
- VR256:$src))>;
-def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
-def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
-
-let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX, VEX_WIG;
- defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX, VEX_WIG;
-}
-let Predicates = [HasAVX, NoVLX] in {
- defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX, VEX_WIG;
-}
-
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(xor
- (bc_v2i64 (v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
- (VPABSBrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
- (VPABSWrr VR128:$src)>;
+ defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG;
+ defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(xor
- (bc_v2i64 (v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
- (VPABSDrr VR128:$src)>;
+ defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG;
}
-
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L, VEX_WIG;
- defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L, VEX_WIG;
+ defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG;
+ defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
- defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L, VEX_WIG;
+ defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG;
}
-let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- def : Pat<(xor
- (bc_v4i64 (v32i1sextv32i8)),
- (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
- (VPABSBYrr VR256:$src)>;
- def : Pat<(xor
- (bc_v4i64 (v16i1sextv16i16)),
- (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
- (VPABSWYrr VR256:$src)>;
-}
-let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(xor
- (bc_v4i64 (v8i1sextv8i32)),
- (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
- (VPABSDYrr VR256:$src)>;
-}
-
-defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, X86Abs, memopv2i64>;
-defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, X86Abs, memopv2i64>;
-defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, X86Abs, memopv2i64>;
-
-let Predicates = [UseSSSE3] in {
- def : Pat<(xor
- (bc_v2i64 (v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
- (PABSBrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
- (PABSWrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
- (PABSDrr VR128:$src)>;
-}
+defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>;
+defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>;
+defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 43d0cf72721..fa289921472 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -370,9 +370,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
- X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0),
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
@@ -838,18 +838,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMULS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FMULS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
@@ -1693,9 +1693,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, ISD::ABS, 0),
X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll
index 109738dcd9b..2d71e4f2f27 100644
--- a/llvm/test/CodeGen/X86/combine-abs.ll
+++ b/llvm/test/CodeGen/X86/combine-abs.ll
@@ -1,13 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s
-; FIXME: Various missed opportunities to simplify integer absolute instructions.
-
; fold (abs c1) -> c2
define <4 x i32> @combine_v4i32_abs_constant() {
; CHECK-LABEL: combine_v4i32_abs_constant:
; CHECK: # BB#0:
-; CHECK-NEXT: vpabsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648]
; CHECK-NEXT: retq
%1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> <i32 0, i32 -1, i32 3, i32 -2147483648>)
ret <4 x i32> %1
@@ -16,7 +14,7 @@ define <4 x i32> @combine_v4i32_abs_constant() {
define <16 x i16> @combine_v16i16_abs_constant() {
; CHECK-LABEL: combine_v16i16_abs_constant:
; CHECK: # BB#0:
-; CHECK-NEXT: vpabsw {{.*}}(%rip), %ymm0
+; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0]
; CHECK-NEXT: retq
%1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> <i16 0, i16 1, i16 -1, i16 3, i16 -3, i16 7, i16 -7, i16 255, i16 -255, i16 4096, i16 -4096, i16 32767, i16 -32767, i16 -32768, i16 32768, i16 65536>)
ret <16 x i16> %1
@@ -27,7 +25,6 @@ define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) {
; CHECK-LABEL: combine_v8i16_abs_abs:
; CHECK: # BB#0:
; CHECK-NEXT: vpabsw %xmm0, %xmm0
-; CHECK-NEXT: vpabsw %xmm0, %xmm0
; CHECK-NEXT: retq
%a1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a)
%n2 = sub <8 x i16> zeroinitializer, %a1
@@ -40,7 +37,6 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) {
; CHECK-LABEL: combine_v32i8_abs_abs:
; CHECK: # BB#0:
; CHECK-NEXT: vpabsb %ymm0, %ymm0
-; CHECK-NEXT: vpabsb %ymm0, %ymm0
; CHECK-NEXT: retq
%n1 = sub <32 x i8> zeroinitializer, %a
%b1 = icmp slt <32 x i8> %a, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll
index da743bc6c4f..34a9df1782a 100644
--- a/llvm/test/CodeGen/X86/viabs.ll
+++ b/llvm/test/CodeGen/X86/viabs.ll
@@ -147,14 +147,10 @@ define <8 x i32> @test_abs_gt_v8i32(<8 x i32> %a) nounwind {
;
; AVX1-LABEL: test_abs_gt_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
-; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_gt_v8i32:
@@ -193,14 +189,10 @@ define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind {
;
; AVX1-LABEL: test_abs_ge_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
-; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_ge_v8i32:
@@ -239,14 +231,10 @@ define <16 x i16> @test_abs_gt_v16i16(<16 x i16> %a) nounwind {
;
; AVX1-LABEL: test_abs_gt_v16i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpsraw $15, %xmm1, %xmm2
-; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsraw $15, %xmm0, %xmm3
-; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpabsw %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsw %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_gt_v16i16:
@@ -285,15 +273,10 @@ define <32 x i8> @test_abs_lt_v32i8(<32 x i8> %a) nounwind {
;
; AVX1-LABEL: test_abs_lt_v32i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3
-; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm4
-; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0
+; AVX1-NEXT: vpabsb %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsb %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_lt_v32i8:
@@ -332,14 +315,10 @@ define <8 x i32> @test_abs_le_v8i32(<8 x i32> %a) nounwind {
;
; AVX1-LABEL: test_abs_le_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
-; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_le_v8i32:
@@ -388,22 +367,14 @@ define <16 x i32> @test_abs_le_16i32(<16 x i32> %a) nounwind {
;
; AVX1-LABEL: test_abs_le_16i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm4
-; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm4
-; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vpabsd %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vpabsd %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpabsd %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_le_16i32:
@@ -450,9 +421,7 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind {
;
; AVX512-LABEL: test_abs_ge_v2i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpsraq $63, %xmm0, %xmm1
-; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpabsq %xmm0, %xmm0
; AVX512-NEXT: retq
%tmp1neg = sub <2 x i64> zeroinitializer, %a
%b = icmp sge <2 x i64> %a, zeroinitializer
@@ -499,9 +468,7 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind {
;
; AVX512-LABEL: test_abs_gt_v4i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpsraq $63, %ymm0, %ymm1
-; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
; AVX512-NEXT: retq
%tmp1neg = sub <4 x i64> zeroinitializer, %a
%b = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
@@ -691,23 +658,14 @@ define <64 x i8> @test_abs_lt_v64i8(<64 x i8> %a) nounwind {
;
; AVX1-LABEL: test_abs_lt_v64i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4
-; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm5
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm6
-; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm6, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4
-; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm5
-; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1
+; AVX1-NEXT: vpabsb %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsb %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vpabsb %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpabsb %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_lt_v64i8:
@@ -763,22 +721,14 @@ define <32 x i16> @test_abs_gt_v32i16(<32 x i16> %a) nounwind {
;
; AVX1-LABEL: test_abs_gt_v32i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
-; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm0, %xmm4
-; AVX1-NEXT: vpaddw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
-; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm1, %xmm4
-; AVX1-NEXT: vpaddw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vpabsw %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsw %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vpabsw %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpabsw %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_gt_v32i16:
OpenPOWER on IntegriCloud