summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-05-25 08:59:18 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-05-25 08:59:18 +0000
commit4298d06d0f2595fe192985eb5fbfd890e4146a2c (patch)
tree86c2d7cef6337ca02a97bb6bc1716eaec72f27a6
parent12e322a8cfb1e043ea8d22a226de2f953b5c3f2a (diff)
downloadbcm5719-llvm-4298d06d0f2595fe192985eb5fbfd890e4146a2c.tar.gz
bcm5719-llvm-4298d06d0f2595fe192985eb5fbfd890e4146a2c.zip
[X86][SSE] Replace (V)CVTDQ2PD(Y) and (V)CVTPS2PD(Y) lossless conversion intrinsics with generic IR
Followup to D20528 clang patch, this removes the (V)CVTDQ2PD(Y) and (V)CVTPS2PD(Y) llvm intrinsics and auto-upgrades to sitofp/fpext instead. Differential Revision: http://reviews.llvm.org/D20568 llvm-svn: 270678
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td8
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp27
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td44
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp2
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll80
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-x86.ll76
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll40
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll36
8 files changed, 170 insertions, 143 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index e118beb87a1..6ebf02d8997 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -485,8 +485,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
@@ -499,8 +497,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse2_cvtps2pd : GCCBuiltin<"__builtin_ia32_cvtps2pd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
@@ -1762,16 +1758,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_cvt_ps2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index ff7588e7fcf..aff89775a5a 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -178,6 +178,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("x86.avx2.pbroadcast") ||
Name.startswith("x86.avx.vpermil.") ||
Name.startswith("x86.sse41.pmovsx") ||
+ Name == "x86.sse2.cvtdq2pd" ||
+ Name == "x86.sse2.cvtps2pd" ||
+ Name == "x86.avx.cvtdq2.pd.256" ||
+ Name == "x86.avx.cvt.ps2.pd.256" ||
Name == "x86.avx.vinsertf128.pd.256" ||
Name == "x86.avx.vinsertf128.ps.256" ||
Name == "x86.avx.vinsertf128.si.256" ||
@@ -397,6 +401,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
"pcmpgt");
// need to sign extend since icmp returns vector of i1
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+ } else if (Name == "llvm.x86.sse2.cvtdq2pd" ||
+ Name == "llvm.x86.sse2.cvtps2pd" ||
+ Name == "llvm.x86.avx.cvtdq2.pd.256" ||
+ Name == "llvm.x86.avx.cvt.ps2.pd.256") {
+ // Lossless i32/float to double conversion.
+ // Extract the bottom elements if necessary and convert to double vector.
+ Value *Src = CI->getArgOperand(0);
+ VectorType *SrcTy = cast<VectorType>(Src->getType());
+ VectorType *DstTy = cast<VectorType>(CI->getType());
+ Rep = CI->getArgOperand(0);
+
+ unsigned NumDstElts = DstTy->getNumElements();
+ if (NumDstElts < SrcTy->getNumElements()) {
+ assert(NumDstElts == 2 && "Unexpected vector size");
+ const int ShuffleMask[2] = { 0, 1 };
+ Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), ShuffleMask);
+ }
+
+ bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
+ if (Int2Double)
+ Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
+ else
+ Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
} else if (Name == "llvm.x86.avx.movnt.dq.256" ||
Name == "llvm.x86.avx.movnt.ps.256" ||
Name == "llvm.x86.avx.movnt.pd.256") {
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 71565df351c..f5a845b08d9 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2163,30 +2163,24 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [], IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
- IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
+ [], IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+ [], IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
}
let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>;
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [], IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
@@ -2197,24 +2191,17 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
let Predicates = [HasAVX] in {
let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- []>, VEX, Sched<[WriteCvtI2FLd]>;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX, Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
- Sched<[WriteCvtI2F]>;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX, Sched<[WriteCvtI2F]>;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvtdq2_pd_256
- (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L,
- Sched<[WriteCvtI2FLd]>;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX, VEX_L, Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
- Sched<[WriteCvtI2F]>;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX, VEX_L, Sched<[WriteCvtI2F]>;
}
let hasSideEffects = 0, mayLoad = 1 in
@@ -2222,8 +2209,7 @@ def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
// AVX register conversion intrinsics
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4e075cadfeb..fb6ec5feb53 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2347,8 +2347,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case llvm::Intrinsic::x86_sse_cvttss2si:
handleVectorConvertIntrinsic(I, 1);
break;
- case llvm::Intrinsic::x86_sse2_cvtdq2pd:
- case llvm::Intrinsic::x86_sse2_cvtps2pd:
case llvm::Intrinsic::x86_sse_cvtps2pi:
case llvm::Intrinsic::x86_sse_cvttps2pi:
handleVectorConvertIntrinsic(I, 2);
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index 05442b7b747..862e9378afe 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -245,3 +245,83 @@ define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+; AVX-LABEL: test_x86_sse2_cvtdq2pd:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX512VL-NEXT: retl
+; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
+; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
+; CHECK-NEXT: retl
+ %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+; AVX-LABEL: test_x86_sse2_cvtps2pd:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvtps2pd %xmm0, %xmm0
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0
+; AVX512VL-NEXT: retl
+; CHECK-LABEL: test_x86_sse2_cvtps2pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
+; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvtps2pd %xmm0, %ymm0
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
+; CHECK-NEXT: retl
+ %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
index a70414f48ed..ac8be0fa2e2 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -274,22 +274,6 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
-define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
-; AVX-LABEL: test_x86_sse2_cvtdq2pd:
-; AVX: ## BB#0:
-; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX512VL-NEXT: retl
- %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
-
-
define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
; AVX-LABEL: test_x86_sse2_cvtdq2ps:
; AVX: ## BB#0:
@@ -354,22 +338,6 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
-define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
-; AVX-LABEL: test_x86_sse2_cvtps2pd:
-; AVX: ## BB#0:
-; AVX-NEXT: vcvtps2pd %xmm0, %xmm0
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0
-; AVX512VL-NEXT: retl
- %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
-
-
define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
; AVX-LABEL: test_x86_sse2_cvtsd2si:
; AVX: ## BB#0:
@@ -1258,14 +1226,14 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
; AVX-LABEL: test_x86_sse2_storeu_dq:
; AVX: ## BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0
+; AVX-NEXT: vpaddb LCPI74_0, %xmm0, %xmm0
; AVX-NEXT: vmovdqu %xmm0, (%eax)
; AVX-NEXT: retl
;
; AVX512VL-LABEL: test_x86_sse2_storeu_dq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpaddb LCPI74_0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovdqu %xmm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -3569,22 +3537,6 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
-define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
-; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
-; AVX: ## BB#0:
-; AVX-NEXT: vcvtps2pd %xmm0, %ymm0
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0
-; AVX512VL-NEXT: retl
- %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
- ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
-
-
define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
; AVX-LABEL: test_x86_avx_cvt_ps2dq_256:
; AVX: ## BB#0:
@@ -3601,22 +3553,6 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
-define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
-; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
-; AVX: ## BB#0:
-; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0
-; AVX512VL-NEXT: retl
- %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
- ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
-
-
define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
; AVX: ## BB#0:
@@ -4190,7 +4126,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddb LCPI235_0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddb LCPI231_0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -4431,7 +4367,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
;
; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vpermilpd LCPI249_0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilpd LCPI245_0, %ymm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
@@ -4923,7 +4859,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX-LABEL: movnt_dq:
; AVX: ## BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0
+; AVX-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
; AVX-NEXT: vmovntdq %ymm0, (%eax)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retl
@@ -4931,7 +4867,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX512VL-LABEL: movnt_dq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
index 32825a67c37..1725e8f8c2b 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
@@ -44,6 +44,46 @@ define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+; SSE-LABEL: test_x86_sse2_cvtdq2pd:
+; SSE: ## BB#0:
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtdq2pd:
+; KNL: ## BB#0:
+; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0
+; KNL-NEXT: retl
+; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+; SSE-LABEL: test_x86_sse2_cvtps2pd:
+; SSE: ## BB#0:
+; SSE-NEXT: cvtps2pd %xmm0, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtps2pd:
+; KNL: ## BB#0:
+; KNL-NEXT: vcvtps2pd %xmm0, %xmm0
+; KNL-NEXT: retl
+; CHECK-LABEL: test_x86_sse2_cvtps2pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: cvtps2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_storel_dq:
; CHECK: ## BB#0:
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
index ca43cdf8d8c..25f73055091 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -178,22 +178,6 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
-define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
-; SSE-LABEL: test_x86_sse2_cvtdq2pd:
-; SSE: ## BB#0:
-; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse2_cvtdq2pd:
-; KNL: ## BB#0:
-; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0
-; KNL-NEXT: retl
- %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
-
-
define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
; SSE-LABEL: test_x86_sse2_cvtdq2ps:
; SSE: ## BB#0:
@@ -258,22 +242,6 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
-define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
-; SSE-LABEL: test_x86_sse2_cvtps2pd:
-; SSE: ## BB#0:
-; SSE-NEXT: cvtps2pd %xmm0, %xmm0
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse2_cvtps2pd:
-; KNL: ## BB#0:
-; KNL-NEXT: vcvtps2pd %xmm0, %xmm0
-; KNL-NEXT: retl
- %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
-
-
define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvtsd2si:
; SSE: ## BB#0:
@@ -1162,14 +1130,14 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_storeu_dq:
; SSE: ## BB#0:
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE-NEXT: paddb LCPI70_0, %xmm0
+; SSE-NEXT: paddb LCPI68_0, %xmm0
; SSE-NEXT: movdqu %xmm0, (%eax)
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_storeu_dq:
; KNL: ## BB#0:
; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL-NEXT: vpaddb LCPI70_0, %xmm0, %xmm0
+; KNL-NEXT: vpaddb LCPI68_0, %xmm0, %xmm0
; KNL-NEXT: vmovdqu %xmm0, (%eax)
; KNL-NEXT: retl
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
OpenPOWER on IntegriCloud