summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp33
-rw-r--r--llvm/test/Transforms/InstCombine/x86-pshufb.ll10
2 files changed, 21 insertions, 22 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 307bb9f9806..bb7f260b2ec 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -597,21 +597,27 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,
if (!V)
return nullptr;
- auto *VTy = cast<VectorType>(V->getType());
- unsigned NumElts = VTy->getNumElements();
+ auto *VecTy = cast<VectorType>(II.getType());
+ auto *MaskEltTy = Type::getInt32Ty(II.getContext());
+ unsigned NumElts = VecTy->getNumElements();
assert((NumElts == 16 || NumElts == 32) &&
"Unexpected number of elements in shuffle mask!");
- // Initialize the resulting shuffle mask to all zeroes.
- uint32_t Indexes[32] = {0};
+ // Construct a shuffle mask from constant integers or UNDEFs.
+ Constant *Indexes[32] = { NULL };
// Each byte in the shuffle control mask forms an index to permute the
// corresponding byte in the destination operand.
for (unsigned I = 0; I < NumElts; ++I) {
Constant *COp = V->getAggregateElement(I);
- if (!COp || !isa<ConstantInt>(COp))
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
return nullptr;
+ if (isa<UndefValue>(COp)) {
+ Indexes[I] = UndefValue::get(MaskEltTy);
+ continue;
+ }
+
int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
// If the most significant bit (bit[7]) of each byte of the shuffle
@@ -619,20 +625,15 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,
// The zero vector is in the right-hand side of the resulting
// shufflevector.
- // The value of each index is the least significant 4 bits of the
- // shuffle control byte.
- Indexes[I] = (Index < 0) ? NumElts : Index & 0xF;
+ // The value of each index for the high 128-bit lane is the least
+ // significant 4 bits of the respective shuffle control byte.
+ Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
+ Indexes[I] = ConstantInt::get(MaskEltTy, Index);
}
- // The value of each index for the high 128-bit lane is the least
- // significant 4 bits of the respective shuffle control byte.
- for (unsigned I = 16; I < NumElts; ++I)
- Indexes[I] += I & 0xF0;
-
- auto ShuffleMask =
- ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, NumElts));
+ auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
auto V1 = II.getArgOperand(0);
- auto V2 = Constant::getNullValue(II.getType());
+ auto V2 = Constant::getNullValue(VecTy);
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
}
diff --git a/llvm/test/Transforms/InstCombine/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/x86-pshufb.ll
index 4ad3e1f7ddc..3ada4fbd166 100644
--- a/llvm/test/Transforms/InstCombine/x86-pshufb.ll
+++ b/llvm/test/Transforms/InstCombine/x86-pshufb.ll
@@ -288,7 +288,7 @@ define <32 x i8> @permute3_avx2(<32 x i8> %InVec) {
define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) {
; CHECK-LABEL: @fold_with_undef_elts(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 16, i32 undef, i32 16, i32 1, i32 16, i32 undef, i32 16, i32 2, i32 16, i32 undef, i32 16, i32 3, i32 16, i32 undef, i32 16>
; CHECK-NEXT: ret <16 x i8> [[TMP1]]
;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
@@ -297,7 +297,7 @@ define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) {
define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) {
; CHECK-LABEL: @fold_with_undef_elts_avx2(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 32, i32 undef, i32 32, i32 1, i32 32, i32 undef, i32 32, i32 2, i32 32, i32 undef, i32 32, i32 3, i32 32, i32 undef, i32 32, i32 16, i32 48, i32 undef, i32 48, i32 17, i32 48, i32 undef, i32 48, i32 18, i32 48, i32 undef, i32 48, i32 19, i32 48, i32 undef, i32 48>
; CHECK-NEXT: ret <32 x i8> [[TMP1]]
;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
@@ -306,8 +306,7 @@ define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) {
define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) {
; CHECK-LABEL: @fold_with_allundef_elts(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef)
-; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+; CHECK-NEXT: ret <16 x i8> undef
;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef)
ret <16 x i8> %1
@@ -315,8 +314,7 @@ define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) {
define <32 x i8> @fold_with_allundef_elts_avx2(<32 x i8> %InVec) {
; CHECK-LABEL: @fold_with_allundef_elts_avx2(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef)
-; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+; CHECK-NEXT: ret <32 x i8> undef
;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef)
ret <32 x i8> %1
OpenPOWER on IntegriCloud