summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2016-06-17 16:46:50 +0000
committerSanjay Patel <spatel@rotateright.com>2016-06-17 16:46:50 +0000
commit216d8cf7201857ba08e50c3a9e8ac452c3f565b9 (patch)
tree68e109115862594da621ee96954e930526706bc5 /llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
parentda9548f949307557ad32c9808b528acb614ead8f (diff)
downloadbcm5719-llvm-216d8cf7201857ba08e50c3a9e8ac452c3f565b9.tar.gz
bcm5719-llvm-216d8cf7201857ba08e50c3a9e8ac452c3f565b9.zip
[InstCombine] allow more than one use for vector bitcast folding with selects
The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp48
1 files changed, 35 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 6b56bc6ff04..7c3ef99d227 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -116,8 +116,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) {
}
}
-/// Here we have (select c, TI, FI), and we know that TI and FI
-/// have the same opcode and only one use each. Try to simplify this.
+/// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI) {
// If this is a cast from the same type, merge.
@@ -129,10 +128,30 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
Type *CondTy = SI.getCondition()->getType();
- if (CondTy->isVectorTy() &&
- (!FIOpndTy->isVectorTy() ||
- CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
+ if (CondTy->isVectorTy()) {
+ if (!FIOpndTy->isVectorTy())
+ return nullptr;
+ if (CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())
+ return nullptr;
+
+ // TODO: If the backend knew how to deal with casts better, we could
+ // remove this limitation. For now, there's too much potential to create
+ // worse codegen by promoting the select ahead of size-altering casts
+ // (PR28160).
+ //
+ // Note that ValueTracking's matchSelectPattern() looks through casts
+ // without checking 'hasOneUse' when it matches min/max patterns, so this
+ // transform may end up happening anyway.
+ if (TI->getOpcode() != Instruction::BitCast &&
+ (!TI->hasOneUse() || !FI->hasOneUse()))
+ return nullptr;
+
+ } else if (!TI->hasOneUse() || !FI->hasOneUse()) {
+ // TODO: The one-use restrictions for a scalar select could be eased if
+ // the fold of a select in visitLoadInst() was enhanced to match a pattern
+ // that includes a cast.
return nullptr;
+ }
// Fold this by inserting a select from the input values.
Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
@@ -141,8 +160,13 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
TI->getType());
}
- // Only handle binary operators here.
- if (!isa<BinaryOperator>(TI))
+ // TODO: This function ends awkwardly in unreachable - fix to be more normal.
+
+ // Only handle binary operators with one-use here. As with the cast case
+ // above, it may be possible to relax the one-use constraint, but that needs
+ // be examined carefully since it may not reduce the total number of
+ // instructions.
+ if (!isa<BinaryOperator>(TI) || !TI->hasOneUse() || !FI->hasOneUse())
return nullptr;
// Figure out if the operations have any operands in common.
@@ -1056,14 +1080,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *Add = foldAddSubSelect(SI, *Builder))
return Add;
+ // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
auto *TI = dyn_cast<Instruction>(TrueVal);
auto *FI = dyn_cast<Instruction>(FalseVal);
- if (TI && FI && TI->hasOneUse() && FI->hasOneUse()) {
- // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
- if (TI->getOpcode() == FI->getOpcode())
- if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
- return IV;
- }
+ if (TI && FI && TI->getOpcode() == FI->getOpcode())
+ if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+ return IV;
// See if we can fold the select into one of our operands.
if (SI.getType()->isIntOrIntVectorTy() || SI.getType()->isFPOrFPVectorTy()) {
OpenPOWER on IntegriCloud