summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-23 16:19:17 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-23 16:19:17 +0000
commitb9e47a8cd0c648422d401d6620130bf06c60fc6f (patch)
tree3bd6af698f89675ac4f1c87f2a1b156d37149398 /llvm/test/CodeGen
parent8aa6f34455695092ac1046a70386f8a7bf98054a (diff)
downloadbcm5719-llvm-b9e47a8cd0c648422d401d6620130bf06c60fc6f.tar.gz
bcm5719-llvm-b9e47a8cd0c648422d401d6620130bf06c60fc6f.zip
[X86][SSE] Added tests where we should be trying to widen a load+splat into a broadcast
llvm-svn: 276527
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/widened-broadcast.ll149
1 files changed, 149 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll
new file mode 100644
index 00000000000..0c71db5abef
--- /dev/null
+++ b/llvm/test/CodeGen/X86/widened-broadcast.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+
+; Widened shuffle broadcast loads
+
+define <4 x i32> @load_splat_4i32_4i32_0101(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; SSE-LABEL: load_splat_4i32_4i32_0101:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: load_splat_4i32_4i32_0101:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_splat_4i32_4i32_0101:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_splat_4i32_4i32_0101:
+; AVX512: # BB#0: # %entry
+; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0
+; AVX512-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ ret <4 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_4i32_01010101(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; SSE-LABEL: load_splat_8i32_4i32_01010101:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: load_splat_8i32_4i32_01010101:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_splat_8i32_4i32_01010101:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovaps (%rdi), %xmm0
+; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_splat_8i32_4i32_01010101:
+; AVX512: # BB#0: # %entry
+; AVX512-NEXT: vmovaps (%rdi), %xmm0
+; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
+; SSE-LABEL: load_splat_8i32_8i32_01010101:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: load_splat_8i32_8i32_01010101:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovapd (%rdi), %ymm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_splat_8i32_8i32_01010101:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_splat_8i32_8i32_01010101:
+; AVX512: # BB#0: # %entry
+; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX512-NEXT: retq
+entry:
+ %ld = load <8 x i32>, <8 x i32>* %ptr
+ %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ ret <8 x i32> %ret
+}
+
+define <8 x i16> @load_splat_8i16_8i16_01010101(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
+; SSE-LABEL: load_splat_8i16_8i16_01010101:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: load_splat_8i16_8i16_01010101:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,0,0,0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_splat_8i16_8i16_01010101:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vbroadcastss (%rdi), %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_splat_8i16_8i16_01010101:
+; AVX512: # BB#0: # %entry
+; AVX512-NEXT: vbroadcastss (%rdi), %xmm0
+; AVX512-NEXT: retq
+entry:
+ %ld = load <8 x i16>, <8 x i16>* %ptr
+ %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ ret <8 x i16> %ret
+}
+
+define <16 x i16> @load_splat_16i16_16i16_01010101(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
+; SSE-LABEL: load_splat_16i16_16i16_01010101:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: load_splat_16i16_16i16_01010101:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_splat_16i16_16i16_01010101:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vbroadcastss (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_splat_16i16_16i16_01010101:
+; AVX512: # BB#0: # %entry
+; AVX512-NEXT: vbroadcastss (%rdi), %ymm0
+; AVX512-NEXT: retq
+entry:
+ %ld = load <16 x i16>, <16 x i16>* %ptr
+ %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ ret <16 x i16> %ret
+}
+
OpenPOWER on IntegriCloud