diff options
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 21 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-avx2.ll | 10 |
2 files changed, 18 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 25f4c768fcd..67fdaced7c4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -696,25 +696,30 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II, if (!V) return nullptr; - VectorType *VecTy = cast<VectorType>(II.getType()); + auto *VecTy = cast<VectorType>(II.getType()); + auto *MaskEltTy = Type::getInt32Ty(II.getContext()); unsigned Size = VecTy->getNumElements(); assert(Size == 8 && "Unexpected shuffle mask size"); - // Initialize the resulting shuffle mask to all zeroes. - uint32_t Indexes[8] = {0}; + // Construct a shuffle mask from constant integers or UNDEFs. + Constant *Indexes[8] = {NULL}; for (unsigned I = 0; I < Size; ++I) { Constant *COp = V->getAggregateElement(I); - if (!COp || !isa<ConstantInt>(COp)) + if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) return nullptr; + if (isa<UndefValue>(COp)) { + Indexes[I] = UndefValue::get(MaskEltTy); + continue; + } + APInt Index = cast<ConstantInt>(COp)->getValue(); - Index = Index.getLoBits(3); - Indexes[I] = (uint32_t)Index.getZExtValue(); + Index = Index.zextOrTrunc(32).getLoBits(3); + Indexes[I] = ConstantInt::get(MaskEltTy, Index); } - auto ShuffleMask = - ConstantDataVector::get(II.getContext(), makeArrayRef(Indexes, Size)); + auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size)); auto V1 = II.getArgOperand(0); auto V2 = UndefValue::get(VecTy); return Builder.CreateShuffleVector(V1, V2, ShuffleMask); diff --git a/llvm/test/Transforms/InstCombine/x86-avx2.ll b/llvm/test/Transforms/InstCombine/x86-avx2.ll index 8d1fd89f642..4c13b4c6ae7 100644 --- a/llvm/test/Transforms/InstCombine/x86-avx2.ll +++ b/llvm/test/Transforms/InstCombine/x86-avx2.ll @@ -61,12 +61,12 @@ define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) { ret <8 x float> %a } -; FIXME: Verify that instcombine is able to fold constant shuffles with undef mask elements. +; Verify that instcombine is able to fold constant shuffles with undef mask elements. define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) { ; CHECK-LABEL: @undef_test_vpermd( -; CHECK-NEXT: [[A:%.*]] = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A:%.*]]0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) -; CHECK-NEXT: ret <8 x i32> [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) ret <8 x i32> %a @@ -74,8 +74,8 @@ define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) { define <8 x float> @undef_test_vpermps(<8 x float> %a0) { ; CHECK-LABEL: @undef_test_vpermps( -; CHECK-NEXT: [[A:%.*]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A:%.*]]0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) -; CHECK-NEXT: ret <8 x float> [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) ret <8 x float> %a |