summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadav Rotem <nadav.rotem@intel.com>2012-04-09 08:33:21 +0000
committerNadav Rotem <nadav.rotem@intel.com>2012-04-09 08:33:21 +0000
commitfb7e2ae53ced339cc83d4c4fd40345946d20050e (patch)
treee7764d9aab05ef24e99cb85a1359eb5cd1e26d39
parentdeffc42d63ac31f31620782672bc40891ee2be1d (diff)
downloadbcm5719-llvm-fb7e2ae53ced339cc83d4c4fd40345946d20050e.tar.gz
bcm5719-llvm-fb7e2ae53ced339cc83d4c4fd40345946d20050e.zip
Lower some x86 shuffle sequences to the vblend family of instructions.
llvm-svn: 154313
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp67
-rw-r--r--llvm/test/CodeGen/X86/avx-shuffle.ll40
2 files changed, 107 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 615cb67f2b6..2bef7581d4b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5377,6 +5377,69 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
+// Try to lower a shuffle node into a simple blend instruction.
+static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG, EVT PtrTy) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ LLVMContext *Context = DAG.getContext();
+ EVT VT = Op.getValueType();
+ EVT InVT = V1.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned EltSize = EltVT.getSizeInBits();
+ int MaskSize = VT.getVectorNumElements();
+ int InSize = InVT.getVectorNumElements();
+
+ // TODO: At the moment we only use AVX blends. We could also use SSE4 blends.
+ if (!Subtarget->hasAVX())
+ return SDValue();
+
+ if (MaskSize != InSize)
+ return SDValue();
+
+ SmallVector<Constant*,2> MaskVals;
+ ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0));
+ ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1));
+
+ for (int i = 0; i < MaskSize; ++i) {
+ int EltIdx = SVOp->getMaskElt(i);
+ if (EltIdx == i || EltIdx == -1)
+ MaskVals.push_back(NegOne);
+ else if (EltIdx == (i + MaskSize))
+ MaskVals.push_back(Zero);
+ else return SDValue();
+ }
+
+ Constant *MaskC = ConstantVector::get(MaskVals);
+ EVT MaskTy = EVT::getEVT(MaskC->getType());
+ assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size");
+ SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy);
+ unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment();
+ SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+
+ if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8)
+ return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
+
+ if (Subtarget->hasAVX()) {
+ switch (MaskTy.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v16i8:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v8i32:
+ case MVT::v4i64:
+ return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
+ }
+ }
+
+ return SDValue();
+}
+
// v8i16 shuffles - Prefer shuffles in the following order:
// 1. [all] pshuflw, pshufhw, optional move
// 2. [ssse3] 1 x pshufb
@@ -6539,6 +6602,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
+ SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG, getPointerTy());
+ if (BlendOp.getNode())
+ return BlendOp;
+
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
// lower it into other known shuffles. FIXME: this isn't true yet, but
diff --git a/llvm/test/CodeGen/X86/avx-shuffle.ll b/llvm/test/CodeGen/X86/avx-shuffle.ll
index 4885842ff7b..f323f3fd20c 100644
--- a/llvm/test/CodeGen/X86/avx-shuffle.ll
+++ b/llvm/test/CodeGen/X86/avx-shuffle.ll
@@ -162,3 +162,43 @@ i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
62>
ret <32 x i8> %0
}
+
+; CHECK: blend1
+; CHECK: vblendvps
+; CHECK: ret
+define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+ %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+ ret <4 x i32> %t
+}
+
+; CHECK: blend2
+; CHECK: vblendvps
+; CHECK: ret
+define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+ %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x i32> %t
+}
+
+; CHECK: blend2a
+; CHECK: vblendvps
+; CHECK: ret
+define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
+ %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x float> %t
+}
+
+; CHECK: blend3
+; CHECK-NOT: vblendvps
+; CHECK: ret
+define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+ %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+ ret <4 x i32> %t
+}
+
+; CHECK: blend4
+; CHECK: vblendvpd
+; CHECK: ret
+define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+ %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+ ret <4 x i64> %t
+}
OpenPOWER on IntegriCloud