diff options
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 55 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-vector-shifts.ll | 309 |
2 files changed, 264 insertions, 100 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 668789b516d..de413c42348 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -200,33 +200,56 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { static Value *SimplifyX86immshift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool ShiftLeft) { - // Simplify if count is constant. To 0 if >= BitWidth, - // otherwise to shl/lshr. - auto CDV = dyn_cast<ConstantDataVector>(II.getArgOperand(1)); - auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(1)); - if (!CDV && !CInt) + // Simplify if count is constant. + auto Arg1 = II.getArgOperand(1); + auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1); + auto CDV = dyn_cast<ConstantDataVector>(Arg1); + auto CInt = dyn_cast<ConstantInt>(Arg1); + if (!CAZ && !CDV && !CInt) return nullptr; - ConstantInt *Count; - if (CDV) - Count = cast<ConstantInt>(CDV->getElementAsConstant(0)); - else - Count = CInt; + + APInt Count(64, 0); + if (CDV) { + // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector + // operand to compute the shift amount. + auto VT = cast<VectorType>(CDV->getType()); + unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits(); + assert((64 % BitWidth) == 0 && "Unexpected packed shift size"); + unsigned NumSubElts = 64 / BitWidth; + + // Concatenate the sub-elements to create the 64-bit value. + for (unsigned i = 0; i != NumSubElts; ++i) { + unsigned SubEltIdx = (NumSubElts - 1) - i; + auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx)); + Count = Count.shl(BitWidth); + Count |= SubElt->getValue().zextOrTrunc(64); + } + } + else if (CInt) + Count = CInt->getValue(); auto Vec = II.getArgOperand(0); auto VT = cast<VectorType>(Vec->getType()); auto SVT = VT->getElementType(); - if (Count->getZExtValue() > (SVT->getPrimitiveSizeInBits() - 1)) - return ConstantAggregateZero::get(VT); - unsigned VWidth = VT->getNumElements(); + unsigned BitWidth = SVT->getPrimitiveSizeInBits(); + + // If shift-by-zero then just return the original value. + if (Count == 0) + return Vec; + + // Handle cases when Shift >= BitWidth - just return zero. + if (Count.uge(BitWidth)) + return ConstantAggregateZero::get(VT); // Get a constant vector of the same type as the first operand. - auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); + auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth)); + auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt); if (ShiftLeft) - return Builder.CreateShl(Vec, Builder.CreateVectorSplat(VWidth, VTCI)); + return Builder.CreateShl(Vec, ShiftVec); - return Builder.CreateLShr(Vec, Builder.CreateVectorSplat(VWidth, VTCI)); + return Builder.CreateLShr(Vec, ShiftVec); } static Value *SimplifyX86extend(const IntrinsicInst &II, diff --git a/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll index 2bf428ef2ff..fdfad4900f9 100644 --- a/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll @@ -7,132 +7,132 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_w_0
-; CHECK: ret <8 x i16> %v
+; CHECK-NEXT: ret <8 x i16> %v
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_w_15
-; CHECK: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <8 x i16> %1
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_w_64
-; CHECK: ret <8 x i16> zeroinitializer
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
ret <8 x i16> %1
}
define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_d_0
-; CHECK: ret <4 x i32> %v
+; CHECK-NEXT: ret <4 x i32> %v
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_d_15
-; CHECK: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <4 x i32> %1
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_d_64
-; CHECK: ret <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
ret <4 x i32> %1
}
define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_q_0
-; CHECK: ret <2 x i64> %v
+; CHECK-NEXT: ret <2 x i64> %v
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_q_15
-; CHECK: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
-; CHECK: ret <2 x i64> %1
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_q_64
-; CHECK: ret <2 x i64> zeroinitializer
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
ret <2 x i64> %1
}
define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_w_0
-; CHECK: ret <16 x i16> %v
+; CHECK-NEXT: ret <16 x i16> %v
%1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_w_15
-; CHECK: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <16 x i16> %1
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
%1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_w_64
-; CHECK: ret <16 x i16> zeroinitializer
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
ret <16 x i16> %1
}
define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_d_0
-; CHECK: ret <8 x i32> %v
+; CHECK-NEXT: ret <8 x i32> %v
%1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_d_15
-; CHECK: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <8 x i32> %1
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
%1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_d_64
-; CHECK: ret <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
ret <8 x i32> %1
}
define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_q_0
-; CHECK: ret <4 x i64> %v
+; CHECK-NEXT: ret <4 x i64> %v
%1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_q_15
-; CHECK: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK: ret <4 x i64> %1
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
%1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_q_64
-; CHECK: ret <4 x i64> zeroinitializer
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
%1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
ret <4 x i64> %1
}
@@ -143,132 +143,132 @@ define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable { define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_w_0
-; CHECK: ret <8 x i16> %v
+; CHECK-NEXT: ret <8 x i16> %v
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
ret <8 x i16> %1
}
define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_w_15
-; CHECK: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <8 x i16> %1
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
ret <8 x i16> %1
}
define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_w_64
-; CHECK: ret <8 x i16> zeroinitializer
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
ret <8 x i16> %1
}
define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_d_0
-; CHECK: ret <4 x i32> %v
+; CHECK-NEXT: ret <4 x i32> %v
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
ret <4 x i32> %1
}
define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_d_15
-; CHECK: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <4 x i32> %1
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
ret <4 x i32> %1
}
define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_d_64
-; CHECK: ret <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
ret <4 x i32> %1
}
define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_q_0
-; CHECK: ret <2 x i64> %v
+; CHECK-NEXT: ret <2 x i64> %v
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
ret <2 x i64> %1
}
define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_q_15
-; CHECK: %1 = shl <2 x i64> %v, <i64 15, i64 15>
-; CHECK: ret <2 x i64> %1
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
ret <2 x i64> %1
}
define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_q_64
-; CHECK: ret <2 x i64> zeroinitializer
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
ret <2 x i64> %1
}
define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_w_0
-; CHECK: ret <16 x i16> %v
+; CHECK-NEXT: ret <16 x i16> %v
%1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
ret <16 x i16> %1
}
define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_w_15
-; CHECK: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <16 x i16> %1
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
%1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
ret <16 x i16> %1
}
define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_w_64
-; CHECK: ret <16 x i16> zeroinitializer
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
ret <16 x i16> %1
}
define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_d_0
-; CHECK: ret <8 x i32> %v
+; CHECK-NEXT: ret <8 x i32> %v
%1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
ret <8 x i32> %1
}
define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_d_15
-; CHECK: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <8 x i32> %1
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
%1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
ret <8 x i32> %1
}
define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_d_64
-; CHECK: ret <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
ret <8 x i32> %1
}
define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_q_0
-; CHECK: ret <4 x i64> %v
+; CHECK-NEXT: ret <4 x i64> %v
%1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
ret <4 x i64> %1
}
define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_q_15
-; CHECK: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK: ret <4 x i64> %1
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
%1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
ret <4 x i64> %1
}
define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_q_64
-; CHECK: ret <4 x i64> zeroinitializer
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
%1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
ret <4 x i64> %1
}
@@ -277,92 +277,162 @@ define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable { ; LSHR - Constant Vector
;
+define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+ ret <8 x i16> %1
+}
+
define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_w_15
-; CHECK: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <8 x i16> %1
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
+define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <8 x i16> %1
+}
+
define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_w_64
-; CHECK: ret <8 x i16> zeroinitializer
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
+define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_d_15
-; CHECK: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <4 x i32> %1
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
+define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <4 x i32> %1
+}
+
define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_d_64
-; CHECK: ret <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
+define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+ ret <2 x i64> %1
+}
+
define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_q_15
-; CHECK: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
-; CHECK: ret <2 x i64> %1
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
%1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_q_64
-; CHECK: ret <2 x i64> zeroinitializer
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
%1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <2 x i64> %1
}
+define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+ ret <16 x i16> %1
+}
+
define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_w_15
-; CHECK: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <16 x i16> %1
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
+define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <16 x i16> %1
+}
+
define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_w_64
-; CHECK: ret <16 x i16> zeroinitializer
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
+define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_d_15
-; CHECK: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <8 x i32> %1
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
+define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <8 x i32> %1
+}
+
define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_d_64
-; CHECK: ret <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
+define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+ ret <4 x i64> %1
+}
+
define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_q_15
-; CHECK: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK: ret <4 x i64> %1
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
%1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_q_64
-; CHECK: ret <4 x i64> zeroinitializer
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
%1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <4 x i64> %1
}
@@ -371,92 +441,162 @@ define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable { ; SHL - Constant Vector
;
+define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+ ret <8 x i16> %1
+}
+
define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_w_15
-; CHECK: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <8 x i16> %1
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
+define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <8 x i16> %1
+}
+
define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_w_64
-; CHECK: ret <8 x i16> zeroinitializer
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
+define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_d_15
-; CHECK: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <4 x i32> %1
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
+define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <4 x i32> %1
+}
+
define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_d_64
-; CHECK: ret <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
+define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+ ret <2 x i64> %1
+}
+
define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_q_15
-; CHECK: %1 = shl <2 x i64> %v, <i64 15, i64 15>
-; CHECK: ret <2 x i64> %1
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
%1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_q_64
-; CHECK: ret <2 x i64> zeroinitializer
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
%1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <2 x i64> %1
}
+define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+ ret <16 x i16> %1
+}
+
define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_w_15
-; CHECK: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <16 x i16> %1
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
+define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <16 x i16> %1
+}
+
define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_w_64
-; CHECK: ret <16 x i16> zeroinitializer
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
+define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_d_15
-; CHECK: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <8 x i32> %1
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
+define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <8 x i32> %1
+}
+
define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_d_64
-; CHECK: ret <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
+define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+ ret <4 x i64> %1
+}
+
define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_q_15
-; CHECK: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK: ret <4 x i64> %1
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
%1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_q_64
-; CHECK: ret <4 x i64> zeroinitializer
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
%1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <4 x i64> %1
}
@@ -660,6 +800,7 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
+
declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
|