summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-06-03 10:30:54 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-06-03 10:30:54 +0000
commit960ca812eda970b91a7fd70bbf6e853e979c38b3 (patch)
tree8f149ffcd8121cdf13e5ef57d3eb60b2cae1d687 /llvm/test
parent05ef1c940fa27615a619e9e945c5867c4c8f0574 (diff)
downloadbcm5719-llvm-960ca812eda970b91a7fd70bbf6e853e979c38b3.tar.gz
bcm5719-llvm-960ca812eda970b91a7fd70bbf6e853e979c38b3.zip
[X86] Added nontemporal scalar store tests
llvm-svn: 271656
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/nontemporal-2.ll172
1 files changed, 172 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/nontemporal-2.ll b/llvm/test/CodeGen/X86/nontemporal-2.ll
index d893260ff95..a2cb74fd99e 100644
--- a/llvm/test/CodeGen/X86/nontemporal-2.ll
+++ b/llvm/test/CodeGen/X86/nontemporal-2.ll
@@ -9,6 +9,98 @@
; Make sure that we generate non-temporal stores for the test cases below.
; We use xorps for zeroing, so domain information isn't available anymore.
+; Scalar versions (zeroing means we can this even for fp types).
+
+define void @test_zero_f32(float* %dst) {
+; SSE-LABEL: test_zero_f32:
+; SSE: # BB#0:
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: movntil %eax, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_zero_f32:
+; AVX: # BB#0:
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: movntil %eax, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_zero_f32:
+; VLX: # BB#0:
+; VLX-NEXT: xorl %eax, %eax
+; VLX-NEXT: movntil %eax, (%rdi)
+; VLX-NEXT: retq
+ store float zeroinitializer, float* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_i32(i32* %dst) {
+; SSE-LABEL: test_zero_i32:
+; SSE: # BB#0:
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: movntil %eax, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_zero_i32:
+; AVX: # BB#0:
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: movntil %eax, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_zero_i32:
+; VLX: # BB#0:
+; VLX-NEXT: xorl %eax, %eax
+; VLX-NEXT: movntil %eax, (%rdi)
+; VLX-NEXT: retq
+ store i32 zeroinitializer, i32* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_f64(double* %dst) {
+; SSE-LABEL: test_zero_f64:
+; SSE: # BB#0:
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: movntiq %rax, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_zero_f64:
+; AVX: # BB#0:
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: movntiq %rax, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_zero_f64:
+; VLX: # BB#0:
+; VLX-NEXT: xorl %eax, %eax
+; VLX-NEXT: movntiq %rax, (%rdi)
+; VLX-NEXT: retq
+ store double zeroinitializer, double* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_i64(i64* %dst) {
+; SSE-LABEL: test_zero_i64:
+; SSE: # BB#0:
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: movntiq %rax, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_zero_i64:
+; AVX: # BB#0:
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: movntiq %rax, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_zero_i64:
+; VLX: # BB#0:
+; VLX-NEXT: xorl %eax, %eax
+; VLX-NEXT: movntiq %rax, (%rdi)
+; VLX-NEXT: retq
+ store i64 zeroinitializer, i64* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+; And now XMM versions.
+
define void @test_zero_v4f32(<4 x float>* %dst) {
; SSE-LABEL: test_zero_v4f32:
; SSE: # BB#0:
@@ -291,6 +383,86 @@ define void @test_zero_v32i8(<32 x i8>* %dst) {
; Check that we also handle arguments. Here the type survives longer.
+; Scalar versions.
+
+define void @test_arg_f32(float %arg, float* %dst) {
+; SSE-LABEL: test_arg_f32:
+; SSE: # BB#0:
+; SSE-NEXT: movss %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_arg_f32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovss %xmm0, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_arg_f32:
+; VLX: # BB#0:
+; VLX-NEXT: vmovss %xmm0, (%rdi)
+; VLX-NEXT: retq
+ store float %arg, float* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_i32(i32 %arg, i32* %dst) {
+; SSE-LABEL: test_arg_i32:
+; SSE: # BB#0:
+; SSE-NEXT: movntil %edi, (%rsi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_arg_i32:
+; AVX: # BB#0:
+; AVX-NEXT: movntil %edi, (%rsi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_arg_i32:
+; VLX: # BB#0:
+; VLX-NEXT: movntil %edi, (%rsi)
+; VLX-NEXT: retq
+ store i32 %arg, i32* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_f64(double %arg, double* %dst) {
+; SSE-LABEL: test_arg_f64:
+; SSE: # BB#0:
+; SSE-NEXT: movsd %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_arg_f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd %xmm0, (%rdi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_arg_f64:
+; VLX: # BB#0:
+; VLX-NEXT: vmovsd %xmm0, (%rdi)
+; VLX-NEXT: retq
+ store double %arg, double* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_i64(i64 %arg, i64* %dst) {
+; SSE-LABEL: test_arg_i64:
+; SSE: # BB#0:
+; SSE-NEXT: movntiq %rdi, (%rsi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_arg_i64:
+; AVX: # BB#0:
+; AVX-NEXT: movntiq %rdi, (%rsi)
+; AVX-NEXT: retq
+;
+; VLX-LABEL: test_arg_i64:
+; VLX: # BB#0:
+; VLX-NEXT: movntiq %rdi, (%rsi)
+; VLX-NEXT: retq
+ store i64 %arg, i64* %dst, align 1, !nontemporal !1
+ ret void
+}
+
+; And now XMM versions.
+
define void @test_arg_v4f32(<4 x float> %arg, <4 x float>* %dst) {
; SSE-LABEL: test_arg_v4f32:
; SSE: # BB#0:
OpenPOWER on IntegriCloud