diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-16 20:30:59 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-16 20:30:59 +0000 |
commit | b75744ceae3cdebdd88fe6c2f12d508a611eec7c (patch) | |
tree | 7277cc34c2cf041f0c445943e14361b916b2c42c | |
parent | 1663e7a472a48a69cedb3ca26f4885425f2648bb (diff) | |
download | bcm5719-llvm-b75744ceae3cdebdd88fe6c2f12d508a611eec7c.tar.gz bcm5719-llvm-b75744ceae3cdebdd88fe6c2f12d508a611eec7c.zip |
[X86][AVX] Add shuffle combine tests for MOVDDUP/MOVSHDUP/MOVSLDUP
128, 256 and 512 bit implementations (some not yet supported by combineX86ShuffleChain)
llvm-svn: 266535
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll | 91 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll | 30 |
2 files changed, 121 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 469a08b150d..cbc4fc0dee7 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -28,6 +28,51 @@ define <4 x float> @combine_vpermilvar_4f32_identity(<4 x float> %a0) { ret <4 x float> %2 } +define <4 x float> @combine_vpermilvar_4f32_movddup(<4 x float> %a0) { +; ALL-LABEL: combine_vpermilvar_4f32_movddup: +; ALL: # BB#0: +; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; ALL-NEXT: retq + %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 1, i32 0, i32 1>) + ret <4 x float> %1 +} + +define <4 x float> @combine_vpermilvar_4f32_movshdup(<4 x float> %a0) { +; ALL-LABEL: combine_vpermilvar_4f32_movshdup: +; ALL: # BB#0: +; ALL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; ALL-NEXT: retq + %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 1, i32 1, i32 3, i32 3>) + ret <4 x float> %1 +} + +define <4 x float> @combine_vpermilvar_4f32_movsldup(<4 x float> %a0) { +; ALL-LABEL: combine_vpermilvar_4f32_movsldup: +; ALL: # BB#0: +; ALL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] +; ALL-NEXT: retq + %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2>) + ret <4 x float> %1 +} + +define <4 x float> @combine_vpermilvar_4f32_unpckh(<4 x float> %a0) { +; ALL-LABEL: combine_vpermilvar_4f32_unpckh: +; ALL: # BB#0: +; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; ALL-NEXT: retq + %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>) + ret <4 x float> %1 +} + +define <4 x float> @combine_vpermilvar_4f32_unpckl(<4 x float> %a0) { +; ALL-LABEL: combine_vpermilvar_4f32_unpckl: +; ALL: # BB#0: +; ALL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] +; ALL-NEXT: retq + %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>) + ret <4 x float> %1 +} + define <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) { ; ALL-LABEL: combine_vpermilvar_8f32_identity: ; ALL: # BB#0: @@ -37,6 +82,33 @@ define <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) { ret <8 x float> %2 } +define <8 x float> @combine_vpermilvar_8f32_movddup(<8 x float> %a0) { +; ALL-LABEL: combine_vpermilvar_8f32_movddup: +; ALL: # BB#0: +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] +; ALL-NEXT: retq + %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>) + ret <8 x float> %1 +} + +define <8 x float> @combine_vpermilvar_8f32_movshdup(<8 x float> %a0) { +; ALL-LABEL: combine_vpermilvar_8f32_movshdup: +; ALL: # BB#0: +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; ALL-NEXT: retq + %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>) + ret <8 x float> %1 +} + +define <8 x float> @combine_vpermilvar_8f32_movsldup(<8 x float> %a0) { +; ALL-LABEL: combine_vpermilvar_8f32_movsldup: +; ALL: # BB#0: +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] +; ALL-NEXT: retq + %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>) + ret <8 x float> %1 +} + define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) { ; ALL-LABEL: combine_vpermilvar_2f64_identity: ; ALL: # BB#0: @@ -46,6 +118,16 @@ define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) { ret <2 x double> %2 } +define <2 x double> @combine_vpermilvar_2f64_movddup(<2 x double> %a0) { +; ALL-LABEL: combine_vpermilvar_2f64_movddup: +; ALL: # BB#0: +; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 +; ALL-NEXT: retq + %1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 0, i64 0>) + ret <2 x double> %1 +} + define <4 x double> @combine_vpermilvar_4f64_identity(<4 x double> %a0) { ; ALL-LABEL: combine_vpermilvar_4f64_identity: ; ALL: # BB#0: @@ -55,6 +137,15 @@ define <4 x double> @combine_vpermilvar_4f64_identity(<4 x double> %a0) { ret <4 x double> %2 } +define <4 x double> @combine_vpermilvar_4f64_movddup(<4 x double> %a0) { +; ALL-LABEL: combine_vpermilvar_4f64_movddup: +; ALL: # BB#0: +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] +; ALL-NEXT: retq + %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>) + ret <4 x double> %1 +} + define <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) { ; ALL-LABEL: combine_vpermilvar_4f32_4stage: ; ALL: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index e88ce08473e..93f6939b9eb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -19,6 +19,16 @@ define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x dou ret <8 x double> %res1 } +define <8 x double> @combine_vpermt2var_8f64_movddup(<8 x double> %x0, <8 x double> %x1) { +; CHECK-LABEL: combine_vpermt2var_8f64_movddup: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,2,2,4,4,6,6] +; CHECK-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res0 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> <i64 0, i64 0, i64 2, i64 2, i64 4, i64 4, i64 6, i64 6>, <8 x double> %x0, <8 x double> %x1, i8 -1) + ret <8 x double> %res0 +} + define <8 x i64> @combine_vpermt2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) { ; CHECK-LABEL: combine_vpermt2var_8i64_identity: ; CHECK: # BB#0: @@ -37,6 +47,26 @@ define <16 x float> @combine_vpermt2var_16f32_identity(<16 x float> %x0, <16 x f ret <16 x float> %res1 } +define <16 x float> @combine_vpermt2var_16f32_vmovshdup(<16 x float> %x0, <16 x float> %x1) { +; CHECK-LABEL: combine_vpermt2var_16f32_vmovshdup: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>, <16 x float> %x0, <16 x float> %x1, i16 -1) + ret <16 x float> %res0 +} + +define <16 x float> @combine_vpermt2var_16f32_vmovsldup(<16 x float> %x0, <16 x float> %x1) { +; CHECK-LABEL: combine_vpermt2var_16f32_vmovsldup: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>, <16 x float> %x0, <16 x float> %x1, i16 -1) + ret <16 x float> %res0 +} + define <16 x i32> @combine_vpermt2var_16i32_identity(<16 x i32> %x0, <16 x i32> %x1) { ; CHECK-LABEL: combine_vpermt2var_16i32_identity: ; CHECK: # BB#0: |