diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 67 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-shuffle.ll | 40 | 
2 files changed, 107 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 615cb67f2b6..2bef7581d4b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5377,6 +5377,69 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {    return LowerAVXCONCAT_VECTORS(Op, DAG);  } +// Try to lower a shuffle node into a simple blend instruction. +static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op, +                                          const X86Subtarget *Subtarget, +                                          SelectionDAG &DAG, EVT PtrTy) { +  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); +  SDValue V1 = SVOp->getOperand(0); +  SDValue V2 = SVOp->getOperand(1); +  DebugLoc dl = SVOp->getDebugLoc(); +  LLVMContext *Context = DAG.getContext(); +  EVT VT = Op.getValueType(); +  EVT InVT = V1.getValueType(); +  EVT EltVT = VT.getVectorElementType(); +  unsigned EltSize = EltVT.getSizeInBits(); +  int MaskSize = VT.getVectorNumElements(); +  int InSize = InVT.getVectorNumElements(); + +  // TODO: At the moment we only use AVX blends. We could also use SSE4 blends. +  if (!Subtarget->hasAVX()) +    return SDValue(); + +  if (MaskSize != InSize) +    return SDValue(); + +  SmallVector<Constant*,2> MaskVals; +  ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0)); +  ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1)); + +  for (int i = 0; i < MaskSize; ++i) { +    int EltIdx = SVOp->getMaskElt(i); +    if (EltIdx == i || EltIdx == -1) +      MaskVals.push_back(NegOne); +    else if (EltIdx == (i + MaskSize)) +      MaskVals.push_back(Zero); +    else return SDValue(); +  } + +  Constant *MaskC = ConstantVector::get(MaskVals); +  EVT MaskTy = EVT::getEVT(MaskC->getType()); +  assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size"); +  SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy); +  unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment(); +  SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx, +                             MachinePointerInfo::getConstantPool(), +                             false, false, false, Alignment); + +  if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8) +    return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); + +  if (Subtarget->hasAVX()) { +    switch (MaskTy.getSimpleVT().SimpleTy) { +    default: return SDValue(); +    case MVT::v16i8: +    case MVT::v4i32: +    case MVT::v2i64: +    case MVT::v8i32: +    case MVT::v4i64: +             return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); +    } +  } + +  return SDValue(); +} +  // v8i16 shuffles - Prefer shuffles in the following order:  // 1. [all]   pshuflw, pshufhw, optional move  // 2. [ssse3] 1 x pshufb @@ -6539,6 +6602,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {      return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,                                  V2, getShuffleVPERM2X128Immediate(SVOp), DAG); +  SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG, getPointerTy()); +  if (BlendOp.getNode()) +    return BlendOp; +    //===--------------------------------------------------------------------===//    // Since no target specific shuffle was selected for this generic one,    // lower it into other known shuffles. FIXME: this isn't true yet, but diff --git a/llvm/test/CodeGen/X86/avx-shuffle.ll b/llvm/test/CodeGen/X86/avx-shuffle.ll index 4885842ff7b..f323f3fd20c 100644 --- a/llvm/test/CodeGen/X86/avx-shuffle.ll +++ b/llvm/test/CodeGen/X86/avx-shuffle.ll @@ -162,3 +162,43 @@ i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32  62>   ret <32 x i8> %0  } + +; CHECK: blend1 +; CHECK: vblendvps +; CHECK: ret +define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { +  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> +  ret <4 x i32> %t +} + +; CHECK: blend2 +; CHECK: vblendvps +; CHECK: ret +define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { +  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +  ret <4 x i32> %t +} + +; CHECK: blend2a +; CHECK: vblendvps +; CHECK: ret +define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline { +  %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +  ret <4 x float> %t +} + +; CHECK: blend3 +; CHECK-NOT: vblendvps +; CHECK: ret +define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { +  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7> +  ret <4 x i32> %t +} + +; CHECK: blend4 +; CHECK: vblendvpd +; CHECK: ret +define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline { +  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> +  ret <4 x i64> %t +}  | 

