summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-05-15 15:18:15 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-05-15 15:18:15 +0000
commitd621120533c0f7d7f6224a2ba2df47518d1821ad (patch)
tree774e96810b34f7c6d3df067671a870925cb657fb /llvm/lib
parentd6879febdc440edcaef5a47831c2b22312599a6d (diff)
downloadbcm5719-llvm-d621120533c0f7d7f6224a2ba2df47518d1821ad.tar.gz
bcm5719-llvm-d621120533c0f7d7f6224a2ba2df47518d1821ad.zip
[X86] Teach the backend how to fold SSE4.1/AVX/AVX2 blend intrinsics.
Added target specific combine rules to fold blend intrinsics according to the following rules: 1) fold(blend A, A, Mask) -> A; 2) fold(blend A, B, <allZeros>) -> A; 3) fold(blend A, B, <allOnes>) -> B. Added two new tests to verify that the new folding rules work for all the optimized blend intrinsics. llvm-svn: 208895
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp56
1 files changed, 54 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4e9eecc15ed..8c9cc60f0f0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18473,10 +18473,61 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IntNo) {
default: return SDValue();
+ // SSE/AVX/AVX2 blend intrinsics.
+ case Intrinsic::x86_avx2_pblendvb:
+ case Intrinsic::x86_avx2_pblendw:
+ case Intrinsic::x86_avx2_pblendd_128:
+ case Intrinsic::x86_avx2_pblendd_256:
+ // Don't try to simplify this intrinsic if we don't have AVX2.
+ if (!Subtarget->hasAVX2())
+ return SDValue();
+ // FALL-THROUGH
+ case Intrinsic::x86_avx_blend_pd_256:
+ case Intrinsic::x86_avx_blend_ps_256:
+ case Intrinsic::x86_avx_blendv_pd_256:
+ case Intrinsic::x86_avx_blendv_ps_256:
+ // Don't try to simplify this intrinsic if we don't have AVX.
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ // FALL-THROUGH
+ case Intrinsic::x86_sse41_pblendw:
+ case Intrinsic::x86_sse41_blendpd:
+ case Intrinsic::x86_sse41_blendps:
+ case Intrinsic::x86_sse41_blendvps:
+ case Intrinsic::x86_sse41_blendvpd:
+ case Intrinsic::x86_sse41_pblendvb: {
+ SDValue Op0 = N->getOperand(1);
+ SDValue Op1 = N->getOperand(2);
+ SDValue Mask = N->getOperand(3);
+
+ // Don't try to simplify this intrinsic if we don't have SSE4.1.
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ // fold (blend A, A, Mask) -> A
+ if (Op0 == Op1)
+ return Op0;
+ // fold (blend A, B, allZeros) -> A
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Op0;
+ // fold (blend A, B, allOnes) -> B
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()))
+ return Op1;
+
+ // Simplify the case where the mask is a constant i32 value.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
+ if (C->isNullValue())
+ return Op0;
+ if (C->isAllOnesValue())
+ return Op1;
+ }
+ }
+
// Packed SSE2/AVX2 arithmetic shift immediate intrinsics.
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
@@ -20343,7 +20394,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
- case ISD::INTRINSIC_WO_CHAIN: return PerformINTRINSIC_WO_CHAINCombine(N, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
}
return SDValue();
OpenPOWER on IntegriCloud