summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-12-07 09:09:54 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-12-07 09:09:54 +0000
commit12301b08144683eddde15e918a839411a26e38c6 (patch)
tree30042545318d6c196ce777952d8cfc30edbee3bf
parent1a01c1502737a8e41aa98d450a37985f72e325fe (diff)
downloadbcm5719-llvm-12301b08144683eddde15e918a839411a26e38c6.tar.gz
bcm5719-llvm-12301b08144683eddde15e918a839411a26e38c6.zip
[X86][AVX] Added tests to load+broadcast non-zero'th vector elements
Baseline for an upcoming patch for PR23022 llvm-svn: 254898
-rw-r--r--llvm/test/CodeGen/X86/avx-vbroadcast.ll153
-rw-r--r--llvm/test/CodeGen/X86/avx2-vbroadcast.ll225
2 files changed, 375 insertions, 3 deletions
diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
index bfc9149b107..5c0f43da876 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
@@ -102,6 +102,159 @@ entry:
ret <4 x i32> %vecinit6.i
}
+; FIXME: Pointer adjusted broadcasts
+
+define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i32_4i32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_4i32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_8i32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i32>, <8 x i32>* %ptr
+ %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x i32> %ret
+}
+
+define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f32_4f32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_4f32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_8f32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x float>, <8 x float>* %ptr
+ %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x float> %ret
+}
+
+define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_4i64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovapd (%rdi), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i64>, <4 x i64>* %ptr
+ %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x i64> %ret
+}
+
+define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_4f64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovapd (%rdi), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x double>, <4 x double>* %ptr
+ %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x double> %ret
+}
+
; Unsupported vbroadcasts
define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index 418707cdc23..186f5087365 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -172,6 +172,225 @@ entry:
ret <4 x i64> %q3
}
+; FIXME: Pointer adjusted broadcasts
+
+define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovdqa (%rdi), %xmm0
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <16 x i8>, <16 x i8>* %ptr
+ %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i8> %ret
+}
+
+define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovdqa (%rdi), %xmm0
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <16 x i8>, <16 x i8>* %ptr
+ %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <32 x i8> %ret
+}
+
+define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovdqa (%rdi), %ymm0
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <32 x i8>, <32 x i8>* %ptr
+ %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <32 x i8> %ret
+}
+
+define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i16_8i16_11111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovdqa (%rdi), %xmm0
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i16>, <8 x i16>* %ptr
+ %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i16> %ret
+}
+
+define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovdqa (%rdi), %xmm0
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i16>, <8 x i16>* %ptr
+ %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i16> %ret
+}
+
+define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovdqa (%rdi), %ymm0
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <16 x i16>, <16 x i16>* %ptr
+ %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i16> %ret
+}
+
+define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i32_4i32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_4i32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovdqa (%rdi), %xmm0
+; CHECK-NEXT: vpbroadcastd LCPI15_0(%rip), %ymm1
+; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_8i32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd LCPI16_0(%rip), %ymm0
+; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i32>, <8 x i32>* %ptr
+ %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x i32> %ret
+}
+
+define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f32_4f32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_4f32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vbroadcastss LCPI18_0(%rip), %ymm1
+; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_8f32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss LCPI19_0(%rip), %ymm0
+; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x float>, <8 x float>* %ptr
+ %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x float> %ret
+}
+
+define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovdqa (%rdi), %xmm0
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_4i64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = mem[2,2,2,2]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i64>, <4 x i64>* %ptr
+ %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x i64> %ret
+}
+
+define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovapd (%rdi), %xmm0
+; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_4f64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,2,2,2]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x double>, <4 x double>* %ptr
+ %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x double> %ret
+}
+
; make sure that we still don't support broadcast double into 128-bit vector
; this used to crash
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
@@ -242,13 +461,13 @@ define void @crash() nounwind alwaysinline {
; CHECK: ## BB#0: ## %WGLoopsEntry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je LBB13_1
+; CHECK-NEXT: je LBB31_1
; CHECK-NEXT: ## BB#2: ## %ret
; CHECK-NEXT: retq
; CHECK-NEXT: .align 4, 0x90
-; CHECK-NEXT: LBB13_1: ## %footer349VF
+; CHECK-NEXT: LBB31_1: ## %footer349VF
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: jmp LBB13_1
+; CHECK-NEXT: jmp LBB31_1
WGLoopsEntry:
br i1 undef, label %ret, label %footer329VF
OpenPOWER on IntegriCloud