diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-01 20:22:42 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-01 20:22:42 +0000 |
commit | eeacc40e27a75d07f360a58b18f1bf1bb3fcd4de (patch) | |
tree | c9a808f0c1ef87ab5d4757e836f6920a34f08d8d /llvm | |
parent | cc7f567b6a465d5940774b2d78b270f5ced93187 (diff) | |
download | bcm5719-llvm-eeacc40e27a75d07f360a58b18f1bf1bb3fcd4de.tar.gz bcm5719-llvm-eeacc40e27a75d07f360a58b18f1bf1bb3fcd4de.zip |
[InstCombine][SSE] Added support to VPERMILVAR to shuffle combine to accept UNDEF elements.
llvm-svn: 268204
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 47 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-avx.ll | 16 |
2 files changed, 35 insertions, 28 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bb7f260b2ec..25f4c768fcd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -604,7 +604,7 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II, "Unexpected number of elements in shuffle mask!"); // Construct a shuffle mask from constant integers or UNDEFs. - Constant *Indexes[32] = { NULL }; + Constant *Indexes[32] = {NULL}; // Each byte in the shuffle control mask forms an index to permute the // corresponding byte in the destination operand. @@ -644,39 +644,46 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II, if (!V) return nullptr; - unsigned Size = cast<VectorType>(V->getType())->getNumElements(); - assert(Size == 8 || Size == 4 || Size == 2); + auto *MaskEltTy = Type::getInt32Ty(II.getContext()); + unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); + assert(NumElts == 8 || NumElts == 4 || NumElts == 2); - // Initialize the resulting shuffle mask to all zeroes. - uint32_t Indexes[8] = { 0 }; + // Construct a shuffle mask from constant integers or UNDEFs. + Constant *Indexes[8] = {NULL}; // The intrinsics only read one or two bits, clear the rest. - for (unsigned I = 0; I < Size; ++I) { + for (unsigned I = 0; I < NumElts; ++I) { Constant *COp = V->getAggregateElement(I); - if (!COp || !isa<ConstantInt>(COp)) + if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) return nullptr; - int32_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue() & 0x3; + if (isa<UndefValue>(COp)) { + Indexes[I] = UndefValue::get(MaskEltTy); + continue; + } + + APInt Index = cast<ConstantInt>(COp)->getValue(); + Index = Index.zextOrTrunc(32).getLoBits(2); // The PD variants uses bit 1 to select per-lane element index, so // shift down to convert to generic shuffle mask index. if (II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd || II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) - Index >>= 1; - Indexes[I] = Index; - } + Index = Index.lshr(1); - // The _256 variants are a bit trickier since the mask bits always index - // into the corresponding 128 half. In order to convert to a generic - // shuffle, we have to make that explicit. - if (II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 || - II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) { - for (unsigned I = Size / 2; I < Size; ++I) - Indexes[I] += Size / 2; + // The _256 variants are a bit trickier since the mask bits always index + // into the corresponding 128 half. In order to convert to a generic + // shuffle, we have to make that explicit. + if ((II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 || + II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) && + ((NumElts / 2) <= I)) { + Index += APInt(32, NumElts / 2); + } + + Indexes[I] = ConstantInt::get(MaskEltTy, Index); } - auto ShuffleMask = - ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size)); + auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts)); auto V1 = II.getArgOperand(0); auto V2 = UndefValue::get(V1->getType()); return Builder.CreateShuffleVector(V1, V2, ShuffleMask); diff --git a/llvm/test/Transforms/InstCombine/x86-avx.ll b/llvm/test/Transforms/InstCombine/x86-avx.ll index cb20038fae7..6590642deeb 100644 --- a/llvm/test/Transforms/InstCombine/x86-avx.ll +++ b/llvm/test/Transforms/InstCombine/x86-avx.ll @@ -117,8 +117,8 @@ define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) { define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_ps( -; CHECK-NEXT: [[A:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>) -; CHECK-NEXT: ret <4 x float> [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef> +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>) ret <4 x float> %a @@ -126,8 +126,8 @@ define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) { define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_ps_256( -; CHECK-NEXT: [[A:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>) -; CHECK-NEXT: ret <8 x float> [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>) ret <8 x float> %a @@ -135,8 +135,8 @@ define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) { define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_pd( -; CHECK-NEXT: [[A:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>) -; CHECK-NEXT: ret <2 x double> [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 undef, i32 0> +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>) ret <2 x double> %a @@ -144,8 +144,8 @@ define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) { define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_pd_256( -; CHECK-NEXT: [[A:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>) -; CHECK-NEXT: ret <4 x double> [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef> +; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>) ret <4 x double> %a |