diff options
author | Sanjay Patel <spatel@rotateright.com> | 2015-03-24 20:36:42 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2015-03-24 20:36:42 +0000 |
commit | 43a87fdc795646eec1df55b7792797008cc6520b (patch) | |
tree | 107b3144ad465345b781c72f742c85baa6cc419d /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | |
parent | 1a94ccbec82acabc40739108d2e69474686599fe (diff) | |
download | bcm5719-llvm-43a87fdc795646eec1df55b7792797008cc6520b.tar.gz bcm5719-llvm-43a87fdc795646eec1df55b7792797008cc6520b.zip |
[X86, AVX] instcombine vperm2 intrinsics with zero inputs into shuffles
This is the IR optimizer follow-on patch for D8563: the x86 backend patch
that converts this kind of shuffle back into a vperm2.
This is also a continuation of the transform that started in D8486.
In that patch, Andrea suggested that we could convert vperm2 intrinsics that
use zero masks into a single shuffle.
This is an implementation of that suggestion.
Differential Revision: http://reviews.llvm.org/D8567
llvm-svn: 233110
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 70 |
1 files changed, 42 insertions, 28 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index b59c9f5d910..8f7825a8664 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -204,7 +204,7 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { VectorType *VecTy = cast<VectorType>(II.getType()); - uint8_t Imm = CInt->getZExtValue(); + ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); // The immediate permute control byte looks like this: // [1:0] - select 128 bits from sources for low half of destination @@ -213,37 +213,51 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II, // [5:4] - select 128 bits from sources for high half of destination // [6] - ignore // [7] - zero high half of destination - - if ((Imm & 0x88) == 0x88) { - // If both zero mask bits are set, this was just a weird way to - // generate a zero vector. - return ConstantAggregateZero::get(VecTy); - } - // TODO: If a single zero bit is set, replace one of the source operands - // with a zero vector and use the same mask generation logic as below. + uint8_t Imm = CInt->getZExtValue(); + + bool LowHalfZero = Imm & 0x08; + bool HighHalfZero = Imm & 0x80; - if ((Imm & 0x88) == 0x00) { - // If neither zero mask bit is set, this is a simple shuffle. - unsigned NumElts = VecTy->getNumElements(); - unsigned HalfSize = NumElts / 2; - unsigned HalfBegin; - SmallVector<int, 8> ShuffleMask(NumElts); + // If both zero mask bits are set, this was just a weird way to + // generate a zero vector. + if (LowHalfZero && HighHalfZero) + return ZeroVector; - // Permute low half of result. - HalfBegin = (Imm & 0x3) * HalfSize; - for (unsigned i = 0; i != HalfSize; ++i) - ShuffleMask[i] = HalfBegin + i; + // If 0 or 1 zero mask bits are set, this is a simple shuffle. + unsigned NumElts = VecTy->getNumElements(); + unsigned HalfSize = NumElts / 2; + SmallVector<int, 8> ShuffleMask(NumElts); + + // The high bit of the selection field chooses the 1st or 2nd operand. + bool LowInputSelect = Imm & 0x02; + bool HighInputSelect = Imm & 0x20; - // Permute high half of result. - HalfBegin = ((Imm >> 4) & 0x3) * HalfSize; - for (unsigned i = HalfSize; i != NumElts; ++i) - ShuffleMask[i] = HalfBegin + i - HalfSize; - - Value *Op0 = II.getArgOperand(0); - Value *Op1 = II.getArgOperand(1); - return Builder.CreateShuffleVector(Op0, Op1, ShuffleMask); - } + // The low bit of the selection field chooses the low or high half + // of the selected operand. + bool LowHalfSelect = Imm & 0x01; + bool HighHalfSelect = Imm & 0x10; + + // Determine which operand(s) are actually in use for this instruction. + Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + + // If needed, replace operands based on zero mask. + V0 = LowHalfZero ? ZeroVector : V0; + V1 = HighHalfZero ? ZeroVector : V1; + + // Permute low half of result. + unsigned StartIndex = LowHalfSelect ? HalfSize : 0; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i] = StartIndex + i; + + // Permute high half of result. + StartIndex = HighHalfSelect ? HalfSize : 0; + StartIndex += NumElts; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i + HalfSize] = StartIndex + i; + + return Builder.CreateShuffleVector(V0, V1, ShuffleMask); } return nullptr; } |