diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 43 | ||||
-rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.h | 17 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 39 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 156 |
7 files changed, 135 insertions, 142 deletions
diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp index 55a05f46259..59920ad9acf 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -309,32 +309,32 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::VPERMILPSmi: - DecodeVPERMILPSMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodeVPERMILPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPSYri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::VPERMILPSYmi: - DecodeVPERMILPSMask(8, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodeVPERMILPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPDri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::VPERMILPDmi: - DecodeVPERMILPDMask(2, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodeVPERMILPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPDYri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::VPERMILPDYmi: - DecodeVPERMILPDMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodeVPERMILPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERM2F128rr: diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index cf1c55e0f7e..daca2401678 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -193,36 +193,23 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { } } -// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes and the mask of the first lane must -// be the same of the second. -void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - unsigned NumLanes = (NumElts*32)/128; - unsigned LaneSize = NumElts/NumLanes; - - for (unsigned l = 0; l != NumLanes; ++l) { - for (unsigned i = 0; i != LaneSize; ++i) { - unsigned Idx = (Imm >> (i*2)) & 0x3 ; - ShuffleMask.push_back(Idx+(l*LaneSize)); - } - } -} +// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit +// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128 +// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of +// the first lane must be the same of the second. +void DecodeVPERMILPMask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); -// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes but the mask of the first lane can -// be the different of the second (not like VPERMILPS). -void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - unsigned NumLanes = (NumElts*64)/128; - unsigned LaneSize = NumElts/NumLanes; + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; - for (unsigned l = 0; l < NumLanes; ++l) { - for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) { - unsigned Idx = (Imm >> i) & 0x1; - ShuffleMask.push_back(Idx+(l*LaneSize)); + for (unsigned l = 0; l != NumLanes; ++l) { + unsigned LaneStart = l*NumLaneElts; + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Idx = NumLaneElts == 4 ? (Imm >> (i*2)) & 0x3 + : (Imm >> (i+LaneStart)) & 0x1; + ShuffleMask.push_back(Idx+LaneStart); } } } diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h index 48b9ef2f8b8..74c34b412bc 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -78,18 +78,11 @@ void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); -// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes and the mask of the first lane must -// be the same of the second. -void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); - -// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes but the mask of the first lane can -// be the different of the second (not like VPERMILPS). -void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm, +// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit +// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128 +// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of +// the first lane must be the same of the second. +void DecodeVPERMILPMask(EVT VT, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); void DecodeVPERM2F128Mask(unsigned Imm, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7626db0441c..0b38d556e37 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2847,8 +2847,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKL: case X86ISD::UNPCKHP: case X86ISD::PUNPCKH: - case X86ISD::VPERMILPS: - case X86ISD::VPERMILPD: + case X86ISD::VPERMILP: case X86ISD::VPERM2F128: case X86ISD::VPERM2I128: return true; @@ -2876,8 +2875,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: - case X86ISD::VPERMILPS: - case X86ISD::VPERMILPD: + case X86ISD::VPERMILP: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); } @@ -4613,14 +4611,9 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, Depth+1); } - case X86ISD::VPERMILPS: + case X86ISD::VPERMILP: ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPSMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::VPERMILPD: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPDMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), + DecodeVPERMILPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); break; case X86ISD::VPERM2F128: @@ -6528,22 +6521,6 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { return 0; } -static inline unsigned getVPERMILOpcode(EVT VT) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v4i32: - case MVT::v4f32: - case MVT::v8i32: - case MVT::v8f32: return X86ISD::VPERMILPS; - case MVT::v2i64: - case MVT::v2f64: - case MVT::v4i64: - case MVT::v4f64: return X86ISD::VPERMILPD; - default: - llvm_unreachable("Unknown type for vpermil"); - } - return 0; -} - static inline unsigned getVPERM2X128Opcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v32i8: @@ -6876,7 +6853,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Handle VPERMILPS/D* permutations if (isVPERMILPMask(M, VT, Subtarget->hasAVX())) - return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, + return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, getShuffleVPERMILPImmediate(SVOp), DAG); // Handle VPERM2F128/VPERM2I128 permutations @@ -11179,8 +11156,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKL: return "X86ISD::PUNPCKL"; case X86ISD::PUNPCKH: return "X86ISD::PUNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; - case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS"; - case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD"; + case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128"; case X86ISD::VPERM2I128: return "X86ISD::VPERM2I128"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; @@ -14767,8 +14743,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PSHUFLW: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::VPERMILPS: - case X86ISD::VPERMILPD: + case X86ISD::VPERMILP: case X86ISD::VPERM2F128: case X86ISD::VPERM2I128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index e0a0e295783..be801dadc18 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -277,8 +277,7 @@ namespace llvm { UNPCKHP, PUNPCKL, PUNPCKH, - VPERMILPS, - VPERMILPD, + VPERMILP, VPERM2F128, VPERM2I128, VBROADCAST, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index ff9c1433fad..b83ca09548c 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -136,8 +136,7 @@ def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>; def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>; def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>; -def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>; -def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>; +def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>; def X86VPerm2i128 : SDNode<"X86ISD::VPERM2I128", SDTShuff3OpI>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 0dca0007d24..b63b17b36d1 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7164,31 +7164,6 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; - //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // @@ -7211,31 +7186,6 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4f32 (VEXTRACTF128rr - (v8f32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2f64 (VEXTRACTF128rr - (v4f64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v8i16 (VEXTRACTF128rr - (v16i16 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v16i8 (VEXTRACTF128rr - (v32i8 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; - //===----------------------------------------------------------------------===// // VMASKMOV - Conditional SIMD Packed Loads and Stores // @@ -7322,22 +7272,22 @@ let ExeDomain = SSEPackedDouble in { int_x86_avx_vpermil_pd_256>; } -def : Pat<(v8f32 (X86VPermilps VR256:$src1, (i8 imm:$imm))), +def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; -def : Pat<(v4f64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))), +def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPDYri VR256:$src1, imm:$imm)>; -def : Pat<(v8i32 (X86VPermilps VR256:$src1, (i8 imm:$imm))), +def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; -def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))), +def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPDYri VR256:$src1, imm:$imm)>; -def : Pat<(v8f32 (X86VPermilps (memopv8f32 addr:$src1), (i8 imm:$imm))), +def : Pat<(v8f32 (X86VPermilp (memopv8f32 addr:$src1), (i8 imm:$imm))), (VPERMILPSYmi addr:$src1, imm:$imm)>; -def : Pat<(v4f64 (X86VPermilpd (memopv4f64 addr:$src1), (i8 imm:$imm))), +def : Pat<(v4f64 (X86VPermilp (memopv4f64 addr:$src1), (i8 imm:$imm))), (VPERMILPDYmi addr:$src1, imm:$imm)>; -def : Pat<(v8i32 (X86VPermilps (bc_v8i32 (memopv4i64 addr:$src1)), +def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)), (i8 imm:$imm))), (VPERMILPSYmi addr:$src1, imm:$imm)>; -def : Pat<(v4i64 (X86VPermilpd (memopv4i64 addr:$src1), (i8 imm:$imm))), +def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))), (VPERMILPDYmi addr:$src1, imm:$imm)>; //===----------------------------------------------------------------------===// @@ -7656,6 +7606,51 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), imm:$src3))]>, VEX_4V; +let Predicates = [HasAVX2] in { +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +} + +// AVX1 patterns +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values // @@ -7670,6 +7665,51 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; +let Predicates = [HasAVX2] in { +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTI128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTI128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTI128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTI128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +} + +// AVX1 patterns +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4f32 (VEXTRACTF128rr + (v8f32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2f64 (VEXTRACTF128rr + (v4f64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTF128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTF128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTF128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTF128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + //===----------------------------------------------------------------------===// // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores // |