summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-05-30 23:15:56 +0000
committerCraig Topper <craig.topper@gmail.com>2016-05-30 23:15:56 +0000
commit8287fd8abde6228ebab32953e85c78e75ce0fd30 (patch)
tree4794e9ba319afa3c9da01ea5f248b5b57f4ddb86 /llvm/test
parent424b5ee8f7d8139b3893fd8577062c0d4482b52a (diff)
downloadbcm5719-llvm-8287fd8abde6228ebab32953e85c78e75ce0fd30.tar.gz
bcm5719-llvm-8287fd8abde6228ebab32953e85c78e75ce0fd30.zip
[X86] Remove SSE/AVX unaligned store intrinsics as clang no longer uses them. Auto upgrade to native unaligned store instructions.
llvm-svn: 271236
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll95
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-x86.ll144
-rw-r--r--llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll16
-rw-r--r--llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll23
-rw-r--r--llvm/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll27
-rw-r--r--llvm/test/CodeGen/X86/sse-intrinsics-x86.ll18
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll31
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll48
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll2
9 files changed, 173 insertions, 231 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index f3a3b774e7a..bc89f7c1eb2 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -355,3 +355,98 @@ define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+ ; add operation forces the execution domain.
+; CHECK-LABEL: test_x86_sse2_storeu_dq:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vpaddb LCPI32_0, %xmm0, %xmm0
+; CHECK-NEXT: vmovdqu %xmm0, (%eax)
+; CHECK-NEXT: retl
+ %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
+ ret void
+}
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+
+define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+ ; fadd operation forces the execution domain.
+; CHECK-LABEL: test_x86_sse2_storeu_pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmovupd %xmm0, (%eax)
+; CHECK-NEXT: retl
+ %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+ call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
+ ret void
+}
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+
+define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
+; CHECK-LABEL: test_x86_sse_storeu_ps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovups %xmm0, (%eax)
+; CHECK-NEXT: retl
+ call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
+ ret void
+}
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+ ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
+ ; add operation forces the execution domain.
+; CHECK-LABEL: test_x86_avx_storeu_dq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vmovups %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
+ %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
+ ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
+
+
+define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
+ ; add operation forces the execution domain.
+; CHECK-LABEL: test_x86_avx_storeu_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vmovupd %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
+ %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
+ call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
+ ret void
+}
+declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
+
+
+define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
+; CHECK-LABEL: test_x86_avx_storeu_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovups %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
+ call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
+ ret void
+}
+declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
index 84f8f3cd150..f5e1f3e210e 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1221,54 +1221,6 @@ define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
-define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
- ; add operation forces the execution domain.
-; AVX-LABEL: test_x86_sse2_storeu_dq:
-; AVX: ## BB#0:
-; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vpaddb LCPI74_0, %xmm0, %xmm0
-; AVX-NEXT: vmovdqu %xmm0, (%eax)
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_sse2_storeu_dq:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddb LCPI74_0, %xmm0, %xmm0
-; AVX512VL-NEXT: vmovdqu %xmm0, (%eax)
-; AVX512VL-NEXT: retl
- %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
- ret void
-}
-declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
-
-
-define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
- ; fadd operation forces the execution domain.
-; AVX-LABEL: test_x86_sse2_storeu_pd:
-; AVX: ## BB#0:
-; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
-; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovupd %xmm0, (%eax)
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_sse2_storeu_pd:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512VL-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
-; AVX512VL-NEXT: vaddpd %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vmovups %xmm0, (%eax)
-; AVX512VL-NEXT: retl
- %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
- call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
- ret void
-}
-declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
-
-
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
; AVX-LABEL: test_x86_sse2_sub_sd:
; AVX: ## BB#0:
@@ -2802,24 +2754,6 @@ define void @test_x86_sse_stmxcsr(i8* %a0) {
declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
-define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
-; AVX-LABEL: test_x86_sse_storeu_ps:
-; AVX: ## BB#0:
-; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vmovups %xmm0, (%eax)
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_sse_storeu_ps:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vmovups %xmm0, (%eax)
-; AVX512VL-NEXT: retl
- call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
- ret void
-}
-declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
-
-
define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
; AVX-LABEL: test_x86_sse_sub_ss:
; AVX: ## BB#0:
@@ -4012,78 +3946,6 @@ define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
-define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
- ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
- ; add operation forces the execution domain.
-; AVX-LABEL: test_x86_avx_storeu_dq_256:
-; AVX: ## BB#0:
-; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX-NEXT: vmovups %ymm0, (%eax)
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddb LCPI225_0, %ymm0, %ymm0
-; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
-; AVX512VL-NEXT: retl
- %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
- ret void
-}
-declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
-
-
-define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
- ; add operation forces the execution domain.
-; AVX-LABEL: test_x86_avx_storeu_pd_256:
-; AVX: ## BB#0:
-; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vmovupd %ymm0, (%eax)
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_avx_storeu_pd_256:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpxord %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vmovups %ymm0, (%eax)
-; AVX512VL-NEXT: retl
- %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
- call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
- ret void
-}
-declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
-
-
-define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
-; AVX-LABEL: test_x86_avx_storeu_ps_256:
-; AVX: ## BB#0:
-; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vmovups %ymm0, (%eax)
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_avx_storeu_ps_256:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vmovups %ymm0, (%eax)
-; AVX512VL-NEXT: retl
- call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
- ret void
-}
-declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
-
-
define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
; AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
; AVX: ## BB#0:
@@ -4271,7 +4133,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
;
; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vpermilpd LCPI239_0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilpd LCPI233_0, %ymm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
@@ -4763,7 +4625,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX-LABEL: movnt_dq:
; AVX: ## BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vpaddq LCPI266_0, %xmm0, %xmm0
+; AVX-NEXT: vpaddq LCPI260_0, %xmm0, %xmm0
; AVX-NEXT: vmovntdq %ymm0, (%eax)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retl
@@ -4771,7 +4633,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX512VL-LABEL: movnt_dq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddq LCPI266_0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpaddq LCPI260_0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
index b537a700852..2c7d055fe2a 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
@@ -365,3 +365,19 @@ define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
+
+; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+ ; add operation forces the execution domain.
+; CHECK-LABEL: test_x86_avx_storeu_dq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vpaddb LCPI33_0, %ymm0, %ymm0
+; CHECK-NEXT: vmovdqu %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
+ %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
+ ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
index b5c4dbcb777..820a87aeab1 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -1475,29 +1475,6 @@ define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
}
declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
-; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
-define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
- ; add operation forces the execution domain.
-; AVX2-LABEL: test_x86_avx_storeu_dq_256:
-; AVX2: ## BB#0:
-; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX2-NEXT: vpaddb LCPI91_0, %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%eax)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddb LCPI91_0, %ymm0, %ymm0
-; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
-; AVX512VL-NEXT: retl
- %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
- ret void
-}
-declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
-
define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) {
; AVX2-LABEL: test_x86_avx2_gather_d_pd:
; AVX2: ## BB#0:
diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll
new file mode 100644
index 00000000000..2900c277f12
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
+
+define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
+; SSE-LABEL: test_x86_sse_storeu_ps:
+; SSE: ## BB#0:
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: movups %xmm0, (%eax)
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse_storeu_ps:
+; KNL: ## BB#0:
+; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL-NEXT: vmovups %xmm0, (%eax)
+; KNL-NEXT: retl
+; CHECK-LABEL: test_x86_sse_storeu_ps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movups %xmm0, (%eax)
+; CHECK-NEXT: retl
+ call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
+ ret void
+}
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+
diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll
index 86b52419a39..c346064e7aa 100644
--- a/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll
@@ -474,24 +474,6 @@ define void @test_x86_sse_stmxcsr(i8* %a0) {
declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
-define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
-; SSE-LABEL: test_x86_sse_storeu_ps:
-; SSE: ## BB#0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE-NEXT: movups %xmm0, (%eax)
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse_storeu_ps:
-; KNL: ## BB#0:
-; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL-NEXT: vmovups %xmm0, (%eax)
-; KNL-NEXT: retl
- call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
- ret void
-}
-declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
-
-
define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse_sub_ss:
; SSE: ## BB#0:
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
index 1725e8f8c2b..42d7c26d42b 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
@@ -96,4 +96,35 @@ define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
+define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+ ; add operation forces the execution domain.
+; CHECK-LABEL: test_x86_sse2_storeu_dq:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: paddb LCPI7_0, %xmm0
+; CHECK-NEXT: movdqu %xmm0, (%eax)
+; CHECK-NEXT: retl
+ %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
+ ret void
+}
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+
+define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+ ; fadd operation forces the execution domain.
+; CHECK-LABEL: test_x86_sse2_storeu_pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: addpd %xmm0, %xmm1
+; CHECK-NEXT: movupd %xmm1, (%eax)
+; CHECK-NEXT: retl
+ %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+ call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
+ ret void
+}
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
index 25f73055091..d06ef2807e9 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -1125,54 +1125,6 @@ define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
-define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
- ; add operation forces the execution domain.
-; SSE-LABEL: test_x86_sse2_storeu_dq:
-; SSE: ## BB#0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE-NEXT: paddb LCPI68_0, %xmm0
-; SSE-NEXT: movdqu %xmm0, (%eax)
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse2_storeu_dq:
-; KNL: ## BB#0:
-; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL-NEXT: vpaddb LCPI68_0, %xmm0, %xmm0
-; KNL-NEXT: vmovdqu %xmm0, (%eax)
-; KNL-NEXT: retl
- %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
- ret void
-}
-declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
-
-
-define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
- ; fadd operation forces the execution domain.
-; SSE-LABEL: test_x86_sse2_storeu_pd:
-; SSE: ## BB#0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
-; SSE-NEXT: addpd %xmm0, %xmm1
-; SSE-NEXT: movupd %xmm1, (%eax)
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse2_storeu_pd:
-; KNL: ## BB#0:
-; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; KNL-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
-; KNL-NEXT: vaddpd %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vmovupd %xmm0, (%eax)
-; KNL-NEXT: retl
- %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
- call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
- ret void
-}
-declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
-
-
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_sub_sd:
; SSE: ## BB#0:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 91e2a9087a9..4b208d64427 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -631,7 +631,7 @@ declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
; CHECK-NOT: br
; CHECK-NOT: = or
; CHECK: store <4 x i32> {{.*}} align 1
-; CHECK: call void @llvm.x86.sse.storeu.ps
+; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
; CHECK: ret void
OpenPOWER on IntegriCloud