diff options
-rw-r--r-- | llvm/test/CodeGen/X86/widened-broadcast.ll | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll index bd38d901cc8..1588078a390 100644 --- a/llvm/test/CodeGen/X86/widened-broadcast.ll +++ b/llvm/test/CodeGen/X86/widened-broadcast.ll @@ -560,3 +560,101 @@ entry: %ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9> ret <8 x float> %ret } + +; PR34394 +define <4 x i32> @load_splat_4i32_2i32_0101(<2 x i32>* %vp) { +; SSE-LABEL: load_splat_4i32_2i32_0101: +; SSE: # %bb.0: +; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE-NEXT: retq +; +; AVX1-LABEL: load_splat_4i32_2i32_0101: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_splat_4i32_2i32_0101: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_splat_4i32_2i32_0101: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 +; AVX512-NEXT: retq + %vec = load <2 x i32>, <2 x i32>* %vp + %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> + ret <4 x i32> %res +} + +define <8 x i32> @load_splat_8i32_2i32_0101(<2 x i32>* %vp) { +; SSE-LABEL: load_splat_8i32_2i32_0101: +; SSE: # %bb.0: +; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: load_splat_8i32_2i32_0101: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_splat_8i32_2i32_0101: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_splat_8i32_2i32_0101: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512-NEXT: vpbroadcastq %xmm0, %ymm0 +; AVX512-NEXT: retq + %vec = load <2 x i32>, <2 x i32>* %vp + %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + ret <8 x i32> %res +} + +define <16 x i32> @load_splat_16i32_2i32_0101(<2 x i32>* %vp) { +; SSE-LABEL: load_splat_16i32_2i32_0101: +; SSE: # %bb.0: +; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: retq +; +; AVX1-LABEL: load_splat_16i32_2i32_0101: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: vmovaps %ymm0, %ymm1 +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_splat_16i32_2i32_0101: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 +; AVX2-NEXT: vmovdqa %ymm0, %ymm1 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_splat_16i32_2i32_0101: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] +; AVX512-NEXT: vpermd %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq + %vec = load <2 x i32>, <2 x i32>* %vp + %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + ret <16 x i32> %res +} |