diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 48 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/kshift.ll | 9 |
2 files changed, 51 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 188a808f569..03181a625b8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16506,6 +16506,49 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, } } +static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask, + MVT VT, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + // Shuffle should be unary. + if (!V2.isUndef()) + return SDValue(); + + int ShiftAmt = -1; + int NumElts = Mask.size(); + for (int i = 0; i != NumElts; ++i) { + int M = Mask[i]; + assert((M == SM_SentinelUndef || (0 <= M && M < NumElts)) && + "Unexpected mask index."); + if (M < 0) + continue; + + // The first non-undef element determines our shift amount. + if (ShiftAmt < 0) { + ShiftAmt = M - i; + // Need to be shifting right. + if (ShiftAmt <= 0) + return SDValue(); + } + // All non-undef elements must shift by the same amount. + if (ShiftAmt != M - i) + return SDValue(); + } + assert(ShiftAmt >= 0 && "All undef?"); + + // Great we found a shift right. + MVT WideVT = VT; + if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) + WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, + DAG.getUNDEF(WideVT), V1, + DAG.getIntPtrConstant(0, DL)); + Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res, + DAG.getConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); +} + // Determine if this shuffle can be implemented with a KSHIFT instruction. // Returns the shift amount if possible or -1 if not. This is a simplified // version of matchShuffleAsShift. @@ -16586,6 +16629,11 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, Extract, DAG.getIntPtrConstant(0, DL)); } + // Try a simple shift right with undef elements. Later we'll try with zeros. + if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget, + DAG)) + return Shift; + // Try to match KSHIFTs. unsigned Offset = 0; for (SDValue V : { V1, V2 }) { diff --git a/llvm/test/CodeGen/X86/kshift.ll b/llvm/test/CodeGen/X86/kshift.ll index 919299f1347..01db68382ca 100644 --- a/llvm/test/CodeGen/X86/kshift.ll +++ b/llvm/test/CodeGen/X86/kshift.ll @@ -676,12 +676,9 @@ define i8 @kshiftr_v8i1_1u3u567z(<8 x i64> %x, <8 x i64> %y) { define i8 @kshiftr_v8i1_234567uu(<8 x i64> %x, <8 x i64> %y) { ; KNL-LABEL: kshiftr_v8i1_234567uu: ; KNL: # %bb.0: -; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,0,1] -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 -; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $2, %k0, %k1 +; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper |