diff options
author | Sanjay Patel <spatel@rotateright.com> | 2015-03-24 20:36:42 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2015-03-24 20:36:42 +0000 |
commit | 43a87fdc795646eec1df55b7792797008cc6520b (patch) | |
tree | 107b3144ad465345b781c72f742c85baa6cc419d /llvm | |
parent | 1a94ccbec82acabc40739108d2e69474686599fe (diff) | |
download | bcm5719-llvm-43a87fdc795646eec1df55b7792797008cc6520b.tar.gz bcm5719-llvm-43a87fdc795646eec1df55b7792797008cc6520b.zip |
[X86, AVX] instcombine vperm2 intrinsics with zero inputs into shuffles
This is the IR optimizer follow-on patch for D8563: the x86 backend patch
that converts this kind of shuffle back into a vperm2.
This is also a continuation of the transform that started in D8486.
In that patch, Andrea suggested that we could convert vperm2 intrinsics that
use zero masks into a single shuffle.
This is an implementation of that suggestion.
Differential Revision: http://reviews.llvm.org/D8567
llvm-svn: 233110
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 70 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-vperm2.ll | 37 |
2 files changed, 68 insertions, 39 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index b59c9f5d910..8f7825a8664 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -204,7 +204,7 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { VectorType *VecTy = cast<VectorType>(II.getType()); - uint8_t Imm = CInt->getZExtValue(); + ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); // The immediate permute control byte looks like this: // [1:0] - select 128 bits from sources for low half of destination @@ -213,37 +213,51 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II, // [5:4] - select 128 bits from sources for high half of destination // [6] - ignore // [7] - zero high half of destination - - if ((Imm & 0x88) == 0x88) { - // If both zero mask bits are set, this was just a weird way to - // generate a zero vector. - return ConstantAggregateZero::get(VecTy); - } - // TODO: If a single zero bit is set, replace one of the source operands - // with a zero vector and use the same mask generation logic as below. + uint8_t Imm = CInt->getZExtValue(); + + bool LowHalfZero = Imm & 0x08; + bool HighHalfZero = Imm & 0x80; - if ((Imm & 0x88) == 0x00) { - // If neither zero mask bit is set, this is a simple shuffle. - unsigned NumElts = VecTy->getNumElements(); - unsigned HalfSize = NumElts / 2; - unsigned HalfBegin; - SmallVector<int, 8> ShuffleMask(NumElts); + // If both zero mask bits are set, this was just a weird way to + // generate a zero vector. + if (LowHalfZero && HighHalfZero) + return ZeroVector; - // Permute low half of result. - HalfBegin = (Imm & 0x3) * HalfSize; - for (unsigned i = 0; i != HalfSize; ++i) - ShuffleMask[i] = HalfBegin + i; + // If 0 or 1 zero mask bits are set, this is a simple shuffle. + unsigned NumElts = VecTy->getNumElements(); + unsigned HalfSize = NumElts / 2; + SmallVector<int, 8> ShuffleMask(NumElts); + + // The high bit of the selection field chooses the 1st or 2nd operand. + bool LowInputSelect = Imm & 0x02; + bool HighInputSelect = Imm & 0x20; - // Permute high half of result. - HalfBegin = ((Imm >> 4) & 0x3) * HalfSize; - for (unsigned i = HalfSize; i != NumElts; ++i) - ShuffleMask[i] = HalfBegin + i - HalfSize; - - Value *Op0 = II.getArgOperand(0); - Value *Op1 = II.getArgOperand(1); - return Builder.CreateShuffleVector(Op0, Op1, ShuffleMask); - } + // The low bit of the selection field chooses the low or high half + // of the selected operand. + bool LowHalfSelect = Imm & 0x01; + bool HighHalfSelect = Imm & 0x10; + + // Determine which operand(s) are actually in use for this instruction. + Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + + // If needed, replace operands based on zero mask. + V0 = LowHalfZero ? ZeroVector : V0; + V1 = HighHalfZero ? ZeroVector : V1; + + // Permute low half of result. + unsigned StartIndex = LowHalfSelect ? HalfSize : 0; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i] = StartIndex + i; + + // Permute high half of result. + StartIndex = HighHalfSelect ? HalfSize : 0; + StartIndex += NumElts; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i + HalfSize] = StartIndex + i; + + return Builder.CreateShuffleVector(V0, V1, ShuffleMask); } return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/x86-vperm2.ll b/llvm/test/Transforms/InstCombine/x86-vperm2.ll index 92cc4afefa7..864296dd189 100644 --- a/llvm/test/Transforms/InstCombine/x86-vperm2.ll +++ b/llvm/test/Transforms/InstCombine/x86-vperm2.ll @@ -76,7 +76,7 @@ define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) { ret <4 x double> %res ; CHECK-LABEL: @perm2pd_0x02 -; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 4, i32 5, i32 0, i32 1> +; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> ; CHECK-NEXT: ret <4 x double> %1 } @@ -85,7 +85,7 @@ define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) { ret <4 x double> %res ; CHECK-LABEL: @perm2pd_0x03 -; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 6, i32 7, i32 0, i32 1> +; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5> ; CHECK-NEXT: ret <4 x double> %1 } @@ -111,7 +111,7 @@ define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) { ret <4 x double> %res ; CHECK-LABEL: @perm2pd_0x12 -; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7> ; CHECK-NEXT: ret <4 x double> %1 } @@ -120,7 +120,7 @@ define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) { ret <4 x double> %res ; CHECK-LABEL: @perm2pd_0x13 -; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> +; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7> ; CHECK-NEXT: ret <4 x double> %1 } @@ -207,26 +207,41 @@ define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) { } -; Confirm that when a single zero mask bit is set, we do nothing. +; Confirm that when a single zero mask bit is set, we replace a source vector with zeros. + +define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) { + %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129) + ret <4 x double> %res + +; CHECK-LABEL: @perm2pd_0x81 +; CHECK-NEXT: shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5> +; CHECK-NEXT: ret <4 x double> +} define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) { %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131) ret <4 x double> %res ; CHECK-LABEL: @perm2pd_0x83 -; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 -125) +; CHECK-NEXT: shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5> ; CHECK-NEXT: ret <4 x double> } +define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) { + %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40) + ret <4 x double> %res -; Confirm that when the other zero mask bit is set, we do nothing. Also confirm that an ignored bit has no effect. +; CHECK-LABEL: @perm2pd_0x28 +; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> +; CHECK-NEXT: ret <4 x double> +} -define <4 x double> @perm2pd_0x48(<4 x double> %a0, <4 x double> %a1) { - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 72) +define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) { + %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8) ret <4 x double> %res -; CHECK-LABEL: @perm2pd_0x48 -; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 72) +; CHECK-LABEL: @perm2pd_0x08 +; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> ; CHECK-NEXT: ret <4 x double> } |