summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h6
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp29
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp29
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td61
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td1
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td78
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h36
-rw-r--r--llvm/test/CodeGen/X86/combine-abs.ll8
-rw-r--r--llvm/test/CodeGen/X86/viabs.ll142
14 files changed, 152 insertions, 254 deletions
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index f943e48a329..d595d43af50 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -339,6 +339,12 @@ namespace ISD {
/// Bitwise operators - logical and, logical or, logical xor.
AND, OR, XOR,
+ /// ABS - Determine the unsigned absolute value of a signed integer value of
+ /// the same bitwidth.
+ /// Note: A value of INT_MIN will return INT_MIN, no saturation or overflow
+ /// is performed.
+ ABS,
+
/// Shift and rotation operations. After legalization, the type of the
/// shift amount is known to be TLI.getShiftAmountTy(). Before legalization
/// the shift amount can be any type, but care must be taken to ensure it is
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 688857081fd..45a842f77a2 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -413,6 +413,7 @@ def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>;
+def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>;
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 788b52dabe9..0fe1edad7fc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -262,6 +262,7 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitRotate(SDNode *N);
+ SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
SDValue visitBITREVERSE(SDNode *N);
SDValue visitCTLZ(SDNode *N);
@@ -1423,6 +1424,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
+ case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
case ISD::CTLZ: return visitCTLZ(N);
@@ -5004,6 +5006,17 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N01C->getAPIntValue(), DL, VT));
}
}
+
+ // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
+ N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+ }
+
// fold (xor x, x) -> 0
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
@@ -5746,6 +5759,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (abs c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+ // fold (abs (abs x)) -> (abs x)
+ if (N0.getOpcode() == ISD::ABS)
+ return N0;
+ // fold (abs x) -> x iff not-negative
+ if (DAG.SignBitIsZero(N0))
+ return N0;
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4f33eef7513..aed422ac86b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3336,6 +3336,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
break;
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
case ISD::BITREVERSE:
return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
@@ -3455,6 +3458,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
+ case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
@@ -3572,6 +3576,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
+ break;
+ case ISD::ABS:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid ABS!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
break;
case ISD::BSWAP:
assert(VT.isInteger() && VT == Operand.getValueType() &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index ba9ec68f1fe..488c60a28ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -300,6 +300,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
// Bit manipulation
+ case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 518417dd11e..7a2d50e63c5 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -900,6 +900,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
+ setOperationAction(ISD::ABS, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 81c78981b6f..6ab8c4b3275 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -896,6 +896,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
+ setOperationAction(ISD::ABS, MVT::v16i8, Legal);
+ setOperationAction(ISD::ABS, MVT::v8i16, Legal);
+ setOperationAction(ISD::ABS, MVT::v4i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
@@ -1081,6 +1084,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
+ setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
@@ -1287,6 +1291,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasVLX()) {
+ setOperationAction(ISD::ABS, MVT::v4i64, Legal);
+ setOperationAction(ISD::ABS, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
@@ -1383,6 +1389,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+ setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
@@ -1562,6 +1569,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
@@ -21155,6 +21163,25 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
return Lower256IntArith(Op, DAG);
}
+static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ MVT VT = Op.getSimpleValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ SDLoc dl(Op);
+ SDValue Src = Op.getOperand(0);
+ SDValue Lo = extract128BitVector(Src, 0, DAG, dl);
+ SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(ISD::ABS, dl, NewVT, Lo),
+ DAG.getNode(ISD::ABS, dl, NewVT, Hi));
+}
+
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
@@ -23699,6 +23726,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN: return LowerMINMAX(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
@@ -24111,7 +24139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
- case X86ISD::ABS: return "X86ISD::ABS";
case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAXS: return "X86ISD::FMAXS";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 73daf640a18..6f89996c558 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -239,9 +239,6 @@ namespace llvm {
FHADD,
FHSUB,
- // Integer absolute value
- ABS,
-
// Detect Conflicts Within a Vector
CONFLICT,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index a5e3b7a8e34..c378069de42 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -8606,66 +8606,7 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
HasBWI>;
}
-defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
-
-def avx512_v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
- VR128X:$src))>;
-def avx512_v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128X:$src, (i8 15)))>;
-def avx512_v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128X:$src, (i8 31)))>;
-def avx512_v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
- VR256X:$src))>;
-def avx512_v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256X:$src, (i8 15)))>;
-def avx512_v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256X:$src, (i8 31)))>;
-
-let Predicates = [HasBWI, HasVLX] in {
- def : Pat<(xor
- (bc_v2i64 (avx512_v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128X:$src), (avx512_v16i1sextv16i8)))),
- (VPABSBZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v2i64 (avx512_v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128X:$src), (avx512_v8i1sextv8i16)))),
- (VPABSWZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v32i1sextv32i8)),
- (bc_v4i64 (add (v32i8 VR256X:$src), (avx512_v32i1sextv32i8)))),
- (VPABSBZ256rr VR256X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v16i1sextv16i16)),
- (bc_v4i64 (add (v16i16 VR256X:$src), (avx512_v16i1sextv16i16)))),
- (VPABSWZ256rr VR256X:$src)>;
-}
-let Predicates = [HasAVX512, HasVLX] in {
- def : Pat<(xor
- (bc_v2i64 (avx512_v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128X:$src), (avx512_v4i1sextv4i32)))),
- (VPABSDZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v8i1sextv8i32)),
- (bc_v4i64 (add (v8i32 VR256X:$src), (avx512_v8i1sextv8i32)))),
- (VPABSDZ256rr VR256X:$src)>;
-}
-
-let Predicates = [HasAVX512] in {
-def : Pat<(xor
- (bc_v8i64 (v16i1sextv16i32)),
- (bc_v8i64 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
- (VPABSDZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v8i1sextv8i64)),
- (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
- (VPABSQZrr VR512:$src)>;
-}
-let Predicates = [HasBWI] in {
-def : Pat<(xor
- (bc_v8i64 (v64i1sextv64i8)),
- (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))),
- (VPABSBZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v32i1sextv32i16)),
- (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))),
- (VPABSWZrr VR512:$src)>;
-}
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index f4146e450dd..9867ba84bb9 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -355,7 +355,6 @@ def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
-def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index b3a51003250..eeab3393d84 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5266,84 +5266,24 @@ multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
Sched<[WriteVecALULd]>;
}
-// Helper fragments to match sext vXi1 to vXiY.
-def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
- VR128:$src))>;
-def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>;
-def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>;
-def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
- VR256:$src))>;
-def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
-def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
-
-let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX, VEX_WIG;
- defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX, VEX_WIG;
-}
-let Predicates = [HasAVX, NoVLX] in {
- defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX, VEX_WIG;
-}
-
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(xor
- (bc_v2i64 (v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
- (VPABSBrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
- (VPABSWrr VR128:$src)>;
+ defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG;
+ defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(xor
- (bc_v2i64 (v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
- (VPABSDrr VR128:$src)>;
+ defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG;
}
-
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L, VEX_WIG;
- defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L, VEX_WIG;
+ defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG;
+ defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
- defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L, VEX_WIG;
+ defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG;
}
-let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- def : Pat<(xor
- (bc_v4i64 (v32i1sextv32i8)),
- (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
- (VPABSBYrr VR256:$src)>;
- def : Pat<(xor
- (bc_v4i64 (v16i1sextv16i16)),
- (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
- (VPABSWYrr VR256:$src)>;
-}
-let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(xor
- (bc_v4i64 (v8i1sextv8i32)),
- (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
- (VPABSDYrr VR256:$src)>;
-}
-
-defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, X86Abs, memopv2i64>;
-defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, X86Abs, memopv2i64>;
-defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, X86Abs, memopv2i64>;
-
-let Predicates = [UseSSSE3] in {
- def : Pat<(xor
- (bc_v2i64 (v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
- (PABSBrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
- (PABSWrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
- (PABSDrr VR128:$src)>;
-}
+defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>;
+defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>;
+defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 43d0cf72721..fa289921472 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -370,9 +370,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
- X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0),
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
@@ -838,18 +838,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMULS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FMULS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
@@ -1693,9 +1693,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, ISD::ABS, 0),
X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll
index 109738dcd9b..2d71e4f2f27 100644
--- a/llvm/test/CodeGen/X86/combine-abs.ll
+++ b/llvm/test/CodeGen/X86/combine-abs.ll
@@ -1,13 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s
-; FIXME: Various missed opportunities to simplify integer absolute instructions.
-
; fold (abs c1) -> c2
define <4 x i32> @combine_v4i32_abs_constant() {
; CHECK-LABEL: combine_v4i32_abs_constant:
; CHECK: # BB#0:
-; CHECK-NEXT: vpabsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648]
; CHECK-NEXT: retq
%1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> <i32 0, i32 -1, i32 3, i32 -2147483648>)
ret <4 x i32> %1
@@ -16,7 +14,7 @@ define <4 x i32> @combine_v4i32_abs_constant() {
define <16 x i16> @combine_v16i16_abs_constant() {
; CHECK-LABEL: combine_v16i16_abs_constant:
; CHECK: # BB#0:
-; CHECK-NEXT: vpabsw {{.*}}(%rip), %ymm0
+; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0]
; CHECK-NEXT: retq
%1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> <i16 0, i16 1, i16 -1, i16 3, i16 -3, i16 7, i16 -7, i16 255, i16 -255, i16 4096, i16 -4096, i16 32767, i16 -32767, i16 -32768, i16 32768, i16 65536>)
ret <16 x i16> %1
@@ -27,7 +25,6 @@ define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) {
; CHECK-LABEL: combine_v8i16_abs_abs:
; CHECK: # BB#0:
; CHECK-NEXT: vpabsw %xmm0, %xmm0
-; CHECK-NEXT: vpabsw %xmm0, %xmm0
; CHECK-NEXT: retq
%a1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a)
%n2 = sub <8 x i16> zeroinitializer, %a1
@@ -40,7 +37,6 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) {
; CHECK-LABEL: combine_v32i8_abs_abs:
; CHECK: # BB#0:
; CHECK-NEXT: vpabsb %ymm0, %ymm0
-; CHECK-NEXT: vpabsb %ymm0, %ymm0
; CHECK-NEXT: retq
%n1 = sub <32 x i8> zeroinitializer, %a
%b1 = icmp slt <32 x i8> %a, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll
index da743bc6c4f..34a9df1782a 100644
--- a/llvm/test/CodeGen/X86/viabs.ll
+++ b/llvm/test/CodeGen/X86/viabs.ll
@@ -147,14 +147,10 @@ define <8 x i32> @test_abs_gt_v8i32(<8 x i32> %a) nounwind {
;
; AVX1-LABEL: test_abs_gt_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
-; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_gt_v8i32:
@@ -193,14 +189,10 @@ define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind {
;
; AVX1-LABEL: test_abs_ge_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
-; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_ge_v8i32:
@@ -239,14 +231,10 @@ define <16 x i16> @test_abs_gt_v16i16(<16 x i16> %a) nounwind {
;
; AVX1-LABEL: test_abs_gt_v16i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpsraw $15, %xmm1, %xmm2
-; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsraw $15, %xmm0, %xmm3
-; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpabsw %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsw %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_gt_v16i16:
@@ -285,15 +273,10 @@ define <32 x i8> @test_abs_lt_v32i8(<32 x i8> %a) nounwind {
;
; AVX1-LABEL: test_abs_lt_v32i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3
-; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm4
-; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0
+; AVX1-NEXT: vpabsb %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsb %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_lt_v32i8:
@@ -332,14 +315,10 @@ define <8 x i32> @test_abs_le_v8i32(<8 x i32> %a) nounwind {
;
; AVX1-LABEL: test_abs_le_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
-; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_le_v8i32:
@@ -388,22 +367,14 @@ define <16 x i32> @test_abs_le_16i32(<16 x i32> %a) nounwind {
;
; AVX1-LABEL: test_abs_le_16i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm4
-; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm4
-; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vpabsd %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vpabsd %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpabsd %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_le_16i32:
@@ -450,9 +421,7 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind {
;
; AVX512-LABEL: test_abs_ge_v2i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpsraq $63, %xmm0, %xmm1
-; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpabsq %xmm0, %xmm0
; AVX512-NEXT: retq
%tmp1neg = sub <2 x i64> zeroinitializer, %a
%b = icmp sge <2 x i64> %a, zeroinitializer
@@ -499,9 +468,7 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind {
;
; AVX512-LABEL: test_abs_gt_v4i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpsraq $63, %ymm0, %ymm1
-; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
; AVX512-NEXT: retq
%tmp1neg = sub <4 x i64> zeroinitializer, %a
%b = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
@@ -691,23 +658,14 @@ define <64 x i8> @test_abs_lt_v64i8(<64 x i8> %a) nounwind {
;
; AVX1-LABEL: test_abs_lt_v64i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4
-; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm5
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm6
-; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm6, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4
-; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm5
-; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1
+; AVX1-NEXT: vpabsb %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsb %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vpabsb %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpabsb %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_lt_v64i8:
@@ -763,22 +721,14 @@ define <32 x i16> @test_abs_gt_v32i16(<32 x i16> %a) nounwind {
;
; AVX1-LABEL: test_abs_gt_v32i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
-; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm0, %xmm4
-; AVX1-NEXT: vpaddw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
-; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm1, %xmm4
-; AVX1-NEXT: vpaddw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vpabsw %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpabsw %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vpabsw %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpabsw %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_abs_gt_v32i16:
OpenPOWER on IntegriCloud