[InstCombine] canonicalize a scalar-select-of-vectors to vector select

This pattern may arise more frequently with an enhancement to SLP vectorization suggested in PR42755: https://bugs.llvm.org/show_bug.cgi?id=42755 ...but we should handle this pattern to make things easier for the backend either way. For all in-tree targets that I looked at, codegen for typical vector sizes looks better when we change to a vector select, so this is safe to do without a cost model (in other words, as a target-independent canonicalization). For example, if the condition of the select is a scalar, we end up with something like this on x86: vpcmpgtd %xmm0, %xmm1, %xmm0 vpextrb $12, %xmm0, %eax testb $1, %al jne LBB0_2 ## %bb.1: vmovaps %xmm3, %xmm2 LBB0_2: vmovaps %xmm2, %xmm0 Rather than the splat-condition variant: vpcmpgtd %xmm0, %xmm1, %xmm0 vpshufd $255, %xmm0, %xmm0 ## xmm0 = xmm0[3,3,3,3] vblendvps %xmm0, %xmm2, %xmm3, %xmm0 Differential Revision: https://reviews.llvm.org/D66095 llvm-svn: 369140
author: Sanjay Patel <spatel@rotateright.com> 2019-08-16 18:51:30 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-08-16 18:51:30 +0000
commit: 39eb2324f7ec48bd455c370dcb26ffcf9f8dfc48 (patch)
tree: 510034cc5300b1435c485863f011157e20791035 /llvm/lib
parent: 3550da79ecdbc2b2a41aa305c659a5f90eb0b3c5 (diff)
download: bcm5719-llvm-39eb2324f7ec48bd455c370dcb26ffcf9f8dfc48.tar.gz
bcm5719-llvm-39eb2324f7ec48bd455c370dcb26ffcf9f8dfc48.zip
1 files changed, 27 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 45c0f4ef03e..c257cf9e1d0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1696,6 +1696,30 @@ static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) {
                                ConstantVector::get(Mask));
 }
 
+/// If we have a select of vectors with a scalar condition, try to convert that
+/// to a vector select by splatting the condition. A splat may get folded with
+/// other operations in IR and having all operands of a select be vector types
+/// is likely better for vector codegen.
+static Instruction *canonicalizeScalarSelectOfVecs(
+    SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
+  Type *Ty = Sel.getType();
+  if (!Ty->isVectorTy())
+    return nullptr;
+
+  // We can replace a single-use extract with constant index.
+  Value *Cond = Sel.getCondition();
+  if (!match(Cond, m_OneUse(m_ExtractElement(m_Value(), m_ConstantInt()))))
+    return nullptr;
+
+  // select (extelt V, Index), T, F --> select (splat V, Index), T, F
+  // Splatting the extracted condition reduces code (we could directly create a
+  // splat shuffle of the source vector to eliminate the intermediate step).
+  unsigned NumElts = Ty->getVectorNumElements();
+  Value *SplatCond = Builder.CreateVectorSplat(NumElts, Cond);
+  Sel.setCondition(SplatCond);
+  return &Sel;
+}
+
 /// Reuse bitcasted operands between a compare and select:
 /// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) -->
 /// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D))
@@ -1992,6 +2016,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   if (Instruction *I = canonicalizeSelectToShuffle(SI))
     return I;
 
+  if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, Builder))
+    return I;
+
   // Canonicalize a one-use integer compare with a non-canonical predicate by
   // inverting the predicate and swapping the select operands. This matches a
   // compare canonicalization for conditional branches.
author	Sanjay Patel <spatel@rotateright.com>	2019-08-16 18:51:30 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-08-16 18:51:30 +0000
commit	39eb2324f7ec48bd455c370dcb26ffcf9f8dfc48 (patch)
tree	510034cc5300b1435c485863f011157e20791035 /llvm/lib
parent	3550da79ecdbc2b2a41aa305c659a5f90eb0b3c5 (diff)
download	bcm5719-llvm-39eb2324f7ec48bd455c370dcb26ffcf9f8dfc48.tar.gz bcm5719-llvm-39eb2324f7ec48bd455c370dcb26ffcf9f8dfc48.zip