summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-06-28 17:48:04 +0000
committerSanjay Patel <spatel@rotateright.com>2018-06-28 17:48:04 +0000
commit57bda365bfcee4708ea9ccde1d2a5e1fbf61c5ec (patch)
treef0eb1482011a03e89e429a821add0f11beb26187 /llvm/lib
parentdafa198c96d29b5cb3694494a3cb01da72745772 (diff)
downloadbcm5719-llvm-57bda365bfcee4708ea9ccde1d2a5e1fbf61c5ec.tar.gz
bcm5719-llvm-57bda365bfcee4708ea9ccde1d2a5e1fbf61c5ec.zip
[InstCombine] allow shl+mul combos with shuffle (select) fold (PR37806)
This is an enhancement to D48401 that was discussed in: https://bugs.llvm.org/show_bug.cgi?id=37806 We can convert a shift-left-by-constant into a multiply (we canonicalize IR in the other direction because that's generally better of course). This allows us to remove the shuffle as we do in the regular opcodes-are-the-same cases. This requires a small hack to make sure we don't introduce any extra poison: https://rise4fun.com/Alive/ZGv Other examples of opcodes where this would work are add+sub and fadd+fsub, but we already canonicalize those subs into adds, so there's nothing to do for those cases AFAICT. There are planned enhancements for opcode transforms such or -> add. Note that there's a different fold needed if we've already managed to simplify away a binop as seen in the test based on PR37806, but we manage to get that one case here because this fold is positioned above the demanded elements fold currently. Differential Revision: https://reviews.llvm.org/D48485 llvm-svn: 335888
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp34
1 files changed, 29 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index fec6baeb69c..458b0f02e15 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1164,10 +1164,33 @@ static Instruction *foldSelectShuffles(ShuffleVectorInst &Shuf) {
else
return nullptr;
- // TODO: There are potential folds where the opcodes do not match (mul+shl).
- if (B0->getOpcode() != B1->getOpcode())
+ // We need matching binops to fold the lanes together.
+ BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
+ BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
+ bool DropNSW = false;
+ if (ConstantsAreOp1 && Opc0 != Opc1) {
+ // If we have multiply and shift-left-by-constant, convert the shift:
+ // shl X, C --> mul X, 1 << C
+ // TODO: We drop "nsw" if shift is converted into multiply because it may
+ // not be correct when the shift amount is BitWidth - 1. We could examine
+ // each vector element to determine if it is safe to keep that flag.
+ if (Opc0 == Instruction::Mul && Opc1 == Instruction::Shl) {
+ C1 = ConstantExpr::getShl(ConstantInt::get(C1->getType(), 1), C1);
+ Opc1 = Instruction::Mul;
+ DropNSW = true;
+ } else if (Opc0 == Instruction::Shl && Opc1 == Instruction::Mul) {
+ C0 = ConstantExpr::getShl(ConstantInt::get(C0->getType(), 1), C0);
+ Opc0 = Instruction::Mul;
+ DropNSW = true;
+ }
+ }
+
+ if (Opc0 != Opc1)
return nullptr;
+ // The opcodes must be the same. Use a new name to make that clear.
+ BinaryOperator::BinaryOps BOpc = Opc0;
+
// Remove a binop and the shuffle by rearranging the constant:
// shuffle (op X, C0), (op X, C1), M --> op X, C'
// shuffle (op C0, X), (op C1, X), M --> op C', X
@@ -1179,13 +1202,14 @@ static Instruction *foldSelectShuffles(ShuffleVectorInst &Shuf) {
if (B0->isIntDivRem())
NewC = getSafeVectorConstantForIntDivRem(NewC);
- BinaryOperator::BinaryOps Opc = B0->getOpcode();
- Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(Opc, X, NewC) :
- BinaryOperator::Create(Opc, NewC, X);
+ Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, X, NewC) :
+ BinaryOperator::Create(BOpc, NewC, X);
// Flags are intersected from the 2 source binops.
NewBO->copyIRFlags(B0);
NewBO->andIRFlags(B1);
+ if (DropNSW)
+ NewBO->setHasNoSignedWrap(false);
return NewBO;
}
OpenPOWER on IntegriCloud