summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp23
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp74
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.h8
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp180
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td8
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td6
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h4
8 files changed, 300 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 3cad9fa1e2a..91b144a4482 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -878,6 +878,29 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::EXTRQI:
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isImm())
+ DecodeEXTRQIMask(MI->getOperand(2).getImm(),
+ MI->getOperand(3).getImm(),
+ ShuffleMask);
+
+ DestName = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+
+ case X86::INSERTQI:
+ if (MI->getOperand(3).isImm() &&
+ MI->getOperand(4).isImm())
+ DecodeINSERTQIMask(MI->getOperand(3).getImm(),
+ MI->getOperand(4).getImm(),
+ ShuffleMask);
+
+ DestName = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
+
case X86::PMOVZXBWrr:
case X86::PMOVZXBDrr:
case X86::PMOVZXBQrr:
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index ef3318ba758..9777c0d85e9 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -431,4 +431,78 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
for (unsigned i = 1; i < NumElts; i++)
Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
+
+void DecodeEXTRQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // Only the bottom 6 bits are valid for each immediate.
+ Len &= 0x3F;
+ Idx &= 0x3F;
+
+ // We can only decode this bit extraction instruction as a shuffle if both the
+ // length and index work with whole bytes.
+ if (0 != (Len % 8) || 0 != (Idx % 8))
+ return;
+
+ // A length of zero is equivalent to a bit length of 64.
+ if (Len == 0)
+ Len = 64;
+
+ // If the length + index exceeds the bottom 64 bits the result is undefined.
+ if ((Len + Idx) > 64) {
+ ShuffleMask.append(16, SM_SentinelUndef);
+ return;
+ }
+
+ // Convert index and index to work with bytes.
+ Len /= 8;
+ Idx /= 8;
+
+ // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
+ // of the lower 64-bits. The upper 64-bits are undefined.
+ for (int i = 0; i != Len; ++i)
+ ShuffleMask.push_back(i + Idx);
+ for (int i = Len; i != 8; ++i)
+ ShuffleMask.push_back(SM_SentinelZero);
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(SM_SentinelUndef);
+}
+
+void DecodeINSERTQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // Only the bottom 6 bits are valid for each immediate.
+ Len &= 0x3F;
+ Idx &= 0x3F;
+
+ // We can only decode this bit insertion instruction as a shuffle if both the
+ // length and index work with whole bytes.
+ if (0 != (Len % 8) || 0 != (Idx % 8))
+ return;
+
+ // A length of zero is equivalent to a bit length of 64.
+ if (Len == 0)
+ Len = 64;
+
+ // If the length + index exceeds the bottom 64 bits the result is undefined.
+ if ((Len + Idx) > 64) {
+ ShuffleMask.append(16, SM_SentinelUndef);
+ return;
+ }
+
+ // Convert index and index to work with bytes.
+ Len /= 8;
+ Idx /= 8;
+
+ // INSERTQ: Extract lowest Len bytes from lower half of second source and
+ // insert over first source starting at Idx byte. The upper 64-bits are
+ // undefined.
+ for (int i = 0; i != Idx; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = 0; i != Len; ++i)
+ ShuffleMask.push_back(i + 16);
+ for (int i = Idx + Len; i != 8; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(SM_SentinelUndef);
+}
+
} // llvm namespace
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 14b69434806..3d10d18e860 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -100,6 +100,14 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a scalar float move instruction as a shuffle mask.
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask.
+void DecodeEXTRQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
+void DecodeINSERTQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
#endif
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 458fa47363a..05b3604f851 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3938,6 +3938,15 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget->hasLZCNT();
}
+/// isUndefInRange - Return true if every element in Mask, beginning
+/// from position Pos and ending in Pos+Size is undef.
+static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
+ for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
+ if (0 <= Mask[i])
+ return false;
+ return true;
+}
+
/// isUndefOrInRange - Return true if Val is undef or if its value falls within
/// the specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
@@ -6914,6 +6923,136 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
return SDValue();
}
+/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
+static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ assert(!Zeroable.all() && "Fully zeroable shuffle mask");
+
+ int Size = Mask.size();
+ int HalfSize = Size / 2;
+ assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ // Upper half must be undefined.
+ if (!isUndefInRange(Mask, HalfSize, HalfSize))
+ return SDValue();
+
+ // EXTRQ: Extract Len elements from lower half of source, starting at Idx.
+ // Remainder of lower half result is zero and upper half is all undef.
+ auto LowerAsEXTRQ = [&]() {
+ // Determine the extraction length from the part of the
+ // lower half that isn't zeroable.
+ int Len = HalfSize;
+ for (; Len >= 0; --Len)
+ if (!Zeroable[Len - 1])
+ break;
+ assert(Len > 0 && "Zeroable shuffle mask");
+
+ // Attempt to match first Len sequential elements from the lower half.
+ SDValue Src;
+ int Idx = -1;
+ for (int i = 0; i != Len; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ SDValue &V = (M < Size ? V1 : V2);
+ M = M % Size;
+
+ // All mask elements must be in the lower half.
+ if (M > HalfSize)
+ return SDValue();
+
+ if (Idx < 0 || (Src == V && Idx == (M - i))) {
+ Src = V;
+ Idx = M - i;
+ continue;
+ }
+ return SDValue();
+ }
+
+ if (Idx < 0)
+ return SDValue();
+
+ assert((Idx + Len) <= HalfSize && "Illegal extraction mask");
+ int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+ int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+ return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ };
+
+ if (SDValue ExtrQ = LowerAsEXTRQ())
+ return ExtrQ;
+
+ // INSERTQ: Extract lowest Len elements from lower half of second source and
+ // insert over first source, starting at Idx.
+ // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
+ auto LowerAsInsertQ = [&]() {
+ for (int Idx = 0; Idx != HalfSize; ++Idx) {
+ SDValue Base;
+
+ // Attempt to match first source from mask before insertion point.
+ if (isUndefInRange(Mask, 0, Idx)) {
+ /* EMPTY */
+ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) {
+ Base = V1;
+ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) {
+ Base = V2;
+ } else {
+ continue;
+ }
+
+ // Extend the extraction length looking to match both the insertion of
+ // the second source and the remaining elements of the first.
+ for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) {
+ SDValue Insert;
+ int Len = Hi - Idx;
+
+ // Match insertion.
+ if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) {
+ Insert = V1;
+ } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) {
+ Insert = V2;
+ } else {
+ continue;
+ }
+
+ // Match the remaining elements of the lower half.
+ if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {
+ /* EMPTY */
+ } else if ((!Base || (Base == V1)) &&
+ isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {
+ Base = V1;
+ } else if ((!Base || (Base == V2)) &&
+ isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,
+ Size + Hi)) {
+ Base = V2;
+ } else {
+ continue;
+ }
+
+ // We may not have a base (first source) - this can safely be undefined.
+ if (!Base)
+ Base = DAG.getUNDEF(VT);
+
+ int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+ int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+ return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ }
+ }
+
+ return SDValue();
+ };
+
+ if (SDValue InsertQ = LowerAsInsertQ())
+ return InsertQ;
+
+ return SDValue();
+}
+
/// \brief Lower a vector shuffle as a zero or any extension.
///
/// Given a specific number of elements, element bit width, and extension
@@ -6921,7 +7060,7 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
/// features of the subtarget.
static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) {
assert(Scale > 1 && "Need a scale to extend.");
int NumElements = VT.getVectorNumElements();
int EltBits = VT.getScalarSizeInBits();
@@ -6958,6 +7097,28 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
}
+ // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
+ // to 64-bits.
+ if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
+ assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
+ assert(VT.getSizeInBits() == 128 && "Unexpected vector width!");
+
+ SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+ DAG.getConstant(EltBits, DL, MVT::i8),
+ DAG.getConstant(0, DL, MVT::i8)));
+ if (isUndefInRange(Mask, NumElements/2, NumElements/2))
+ return DAG.getNode(ISD::BITCAST, DL, VT, Lo);
+
+ SDValue Hi =
+ DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+ DAG.getConstant(EltBits, DL, MVT::i8),
+ DAG.getConstant(EltBits, DL, MVT::i8)));
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
+ }
+
// If this would require more than 2 unpack instructions to expand, use
// pshufb when available. We can only use more than 2 unpack instructions
// when zero extending i8 elements which also makes it easier to use pshufb.
@@ -7048,7 +7209,7 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
return SDValue();
return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
- DL, VT, Scale, AnyExt, InputV, Subtarget, DAG);
+ DL, VT, Scale, AnyExt, InputV, Mask, Subtarget, DAG);
};
// The widest scale possible for extending is to a 64-bit integer.
@@ -8575,6 +8736,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, DAG))
return Shift;
+ // See if we can use SSE4A Extraction / Insertion.
+ if (Subtarget->hasSSE4A())
+ if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG))
+ return V;
+
// There are special ways we can lower some single-element blends.
if (NumV2Inputs == 1)
if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2,
@@ -8727,6 +8893,11 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return ZExt;
+ // See if we can use SSE4A Extraction / Insertion.
+ if (Subtarget->hasSSE4A())
+ if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ return V;
+
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; });
@@ -15116,6 +15287,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
case INTR_TYPE_3OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
+ case INTR_TYPE_4OP:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
case INTR_TYPE_1OP_MASK_RM: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
@@ -18509,6 +18683,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
+ case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index e0e6133685e..f82ea408017 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -394,6 +394,9 @@ namespace llvm {
VINSERT,
VEXTRACT,
+ /// SSE4A Extraction and Insertion.
+ EXTRQI, INSERTQI,
+
// Vector multiply packed unsigned doubleword integers
PMULUDQ,
// Vector multiply packed signed doubleword integers
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 153bd476958..89447b6d308 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -204,6 +204,14 @@ def X86pmuldq : SDNode<"X86ISD::PMULDQ",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSameAs<1,2>]>>;
+def X86extrqi : SDNode<"X86ISD::EXTRQI",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i8>, SDTCisVT<3, i8>]>>;
+def X86insertqi : SDNode<"X86ISD::INSERTQI",
+ SDTypeProfile<1, 4, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>,
+ SDTCisVT<4, i8>]>>;
+
// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
// translated into one of the target nodes below during lowering.
// Note: this is a work in progress...
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 523c0e52f9b..ade3d25b046 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7773,7 +7773,7 @@ let Constraints = "$src = $dst" in {
def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
(ins VR128:$src, u8imm:$len, u8imm:$idx),
"extrq\t{$idx, $len, $src|$src, $len, $idx}",
- [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len,
+ [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len,
imm:$idx))]>, PD;
def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
@@ -7784,8 +7784,8 @@ def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
"insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
- [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src,
- VR128:$src2, imm:$len, imm:$idx))]>, XD;
+ [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
+ imm:$len, imm:$idx))]>, XD;
def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
"insertq\t{$mask, $src|$src, $mask}",
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index d44337e8dec..2c8b95bcba2 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -19,7 +19,7 @@ namespace llvm {
enum IntrinsicType {
INTR_NO_TYPE,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
- INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
+ INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
@@ -1079,6 +1079,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
+ X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse_comigt_ss, COMI, X86ISD::COMI, ISD::SETGT),
OpenPOWER on IntegriCloud