diff options
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll | 63 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 40 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/x86-vperm2.ll | 229 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll | 313 |
6 files changed, 323 insertions, 377 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll index 4e015c2da85..7ff43f60bba 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -762,3 +762,66 @@ define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { ret <8 x float> %res } declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256: +; CHECK: # BB#0: +; CHECK-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] +; CHECK-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] +; CHECK-NEXT: retl # encoding: [0xc3] +; X86-LABEL: test_x86_avx_vperm2f128_pd_256: +; X86: # BB#0: +; X86-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] +; X86-NEXT: retl +; +; X64-LABEL: test_x86_avx_vperm2f128_pd_256: +; X64: # BB#0: +; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] +; X64-NEXT: retq + %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { +; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256: +; CHECK: # BB#0: +; CHECK-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] +; CHECK-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] +; CHECK-NEXT: retl # encoding: [0xc3] +; X86-LABEL: test_x86_avx_vperm2f128_ps_256: +; X86: # BB#0: +; X86-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] +; X86-NEXT: retl +; +; X64-LABEL: test_x86_avx_vperm2f128_ps_256: +; X64: # BB#0: +; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] +; X64-NEXT: retq + %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: test_x86_avx_vperm2f128_si_256: +; CHECK: # BB#0: +; CHECK-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] +; CHECK-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] +; CHECK-NEXT: retl # encoding: [0xc3] +; X86-LABEL: test_x86_avx_vperm2f128_si_256: +; X86: # BB#0: +; X86-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] +; X86-NEXT: retl +; +; X64-LABEL: test_x86_avx_vperm2f128_si_256: +; X64: # BB#0: +; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] +; X64-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index 79c6b1f3237..4e65790bf3d 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -612,42 +612,6 @@ define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone -define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256: -; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 $3, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x03] -; CHECK-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] -; CHECK-NEXT: retl # encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1] - ret <4 x double> %res -} -declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone - - -define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { -; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256: -; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 $3, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x03] -; CHECK-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] -; CHECK-NEXT: retl # encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1] - ret <8 x float> %res -} -declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone - - -define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { -; CHECK-LABEL: test_x86_avx_vperm2f128_si_256: -; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 $3, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x03] -; CHECK-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] -; CHECK-NEXT: retl # encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1] - ret <8 x i32> %res -} -declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone - - define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { ; AVX-LABEL: test_x86_avx_vpermilvar_pd: ; AVX: # BB#0: diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll index 8f75311db10..c7babe086f0 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: test_x86_avx2_pblendw: @@ -565,3 +565,18 @@ define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) { } declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone + +define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { +; AVX2-LABEL: test_x86_avx2_vperm2i128: +; AVX2: ## BB#0: +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX2-NEXT: retl +; +; AVX512-LABEL: test_x86_avx2_vperm2i128: +; AVX512: ## BB#0: +; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX512-NEXT: retl + %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index 2eaa2dc1ae7..77039a13ed5 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1002,18 +1002,6 @@ define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) { declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly -define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { -; CHECK-LABEL: test_x86_avx2_vperm2i128: -; CHECK: ## BB#0: -; CHECK-NEXT: vperm2f128 $1, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x01] -; CHECK-NEXT: ## ymm0 = ymm0[2,3,0,1] -; CHECK-NEXT: retl ## encoding: [0xc3] - %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly - - define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_x86_avx2_maskload_q: ; CHECK: ## BB#0: @@ -1259,18 +1247,18 @@ define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) { ; AVX2: ## BB#0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] ; AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI85_0, kind: FK_Data_4 -; AVX2-NEXT: vpsravd LCPI85_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI85_1, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI84_0, kind: FK_Data_4 +; AVX2-NEXT: vpsravd LCPI84_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI84_1, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_const: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmovdqa LCPI85_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] +; AVX512VL-NEXT: vmovdqa LCPI84_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] ; AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI85_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI85_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI85_1, kind: FK_Data_4 +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI84_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI84_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI84_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>) ret <4 x i32> %res @@ -1296,18 +1284,18 @@ define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) ; AVX2: ## BB#0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI87_0, kind: FK_Data_4 -; AVX2-NEXT: vpsravd LCPI87_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI87_1, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4 +; AVX2-NEXT: vpsravd LCPI86_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmovdqa LCPI87_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; AVX512VL-NEXT: vmovdqa LCPI86_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI87_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI87_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI87_1, kind: FK_Data_4 +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI86_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>) ret <8 x i32> %res diff --git a/llvm/test/CodeGen/X86/x86-vperm2.ll b/llvm/test/CodeGen/X86/x86-vperm2.ll new file mode 100644 index 00000000000..4558afafd83 --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-vperm2.ll @@ -0,0 +1,229 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 + +; Test cases derived from the possible immediate values of the vperm2f128 intrinsics. + +define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x00: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc0,0x00] +; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a0, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x01: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $1, %ymm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc0,0x01] +; CHECK-NEXT: ## ymm0 = ymm0[2,3,0,1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a0, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x02: +; CHECK: ## BB#0: +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe3,0x75,0x18,0xc0,0x01] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x03: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] +; CHECK-NEXT: ## ymm0 = ymm1[2,3],ymm0[0,1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x10: +; CHECK: ## BB#0: +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a0, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x11: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $17, %ymm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc0,0x11] +; CHECK-NEXT: ## ymm0 = ymm0[2,3,2,3] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a0, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x12: +; CHECK: ## BB#0: +; CHECK-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe3,0x75,0x0d,0xc0,0x0c] +; CHECK-NEXT: ## ymm0 = ymm1[0,1],ymm0[2,3] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x13: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $49, %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x31] +; CHECK-NEXT: ## ymm0 = ymm1[2,3],ymm0[2,3] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x20: +; CHECK: ## BB#0: +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x21: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $33, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x21] +; CHECK-NEXT: ## ymm0 = ymm0[2,3],ymm1[0,1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x22: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $0, %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x00] +; CHECK-NEXT: ## ymm0 = ymm1[0,1,0,1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a1, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x23: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $1, %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x01] +; CHECK-NEXT: ## ymm0 = ymm1[2,3,0,1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a1, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x30: +; CHECK: ## BB#0: +; CHECK-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c] +; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x31: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $49, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x31] +; CHECK-NEXT: ## ymm0 = ymm0[2,3],ymm1[2,3] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x32: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0xc5,0xfc,0x28,0xc1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a1, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x33: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $17, %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x11] +; CHECK-NEXT: ## ymm0 = ymm1[2,3,2,3] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a1, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7> + ret <4 x double> %1 +} + +define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) { +; CHECK-LABEL: perm2ps_0x31: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $49, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x31] +; CHECK-NEXT: ## ymm0 = ymm0[2,3],ymm1[2,3] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> + ret <8 x float> %1 +} + +define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) { +; CHECK-LABEL: perm2i_0x33: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $17, %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x11] +; CHECK-NEXT: ## ymm0 = ymm1[2,3,2,3] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x i64> %a1, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7> + ret <4 x i64> %1 +} + +define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x81: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc0,0x81] +; CHECK-NEXT: ## ymm0 = ymm0[2,3],zero,zero +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a0, <4 x double> zeroinitializer, <4 x i32> <i32 2, i32 3, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x83: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $129, %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x81] +; CHECK-NEXT: ## ymm0 = ymm1[2,3],zero,zero +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> %a1, <4 x double> zeroinitializer, <4 x i32> <i32 2, i32 3, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x28: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $40, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x28] +; CHECK-NEXT: ## ymm0 = zero,zero,ymm1[0,1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> zeroinitializer, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) { +; CHECK-LABEL: perm2pd_0x08: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc0,0x28] +; CHECK-NEXT: ## ymm0 = zero,zero,ymm0[0,1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x double> zeroinitializer, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + ret <4 x double> %1 +} + +define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) { +; CHECK-LABEL: perm2i_0x28: +; CHECK: ## BB#0: +; CHECK-NEXT: vperm2f128 $40, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x28] +; CHECK-NEXT: ## ymm0 = zero,zero,ymm1[0,1] +; CHECK-NEXT: retl ## encoding: [0xc3] + %1 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + ret <4 x i64> %1 +} diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll b/llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll deleted file mode 100644 index 84f69aa25d2..00000000000 --- a/llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll +++ /dev/null @@ -1,313 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s - -; This should never happen, but make sure we don't crash handling a non-constant immediate byte. - -define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) { -; CHECK-LABEL: @perm2pd_non_const_imm( -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b) -; CHECK-NEXT: ret <4 x double> [[RES]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b) - ret <4 x double> %res - -} - - -; In the following 4 tests, both zero mask bits of the immediate are set. - -define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x88( -; CHECK-NEXT: ret <4 x double> zeroinitializer -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136) - ret <4 x double> %res - -} - -define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) { -; CHECK-LABEL: @perm2ps_0x88( -; CHECK-NEXT: ret <8 x float> zeroinitializer -; - %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136) - ret <8 x float> %res - -} - -define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) { -; CHECK-LABEL: @perm2si_0x88( -; CHECK-NEXT: ret <8 x i32> zeroinitializer -; - %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136) - ret <8 x i32> %res - -} - -define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) { -; CHECK-LABEL: @perm2i_0x88( -; CHECK-NEXT: ret <4 x i64> zeroinitializer -; - %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136) - ret <4 x i64> %res - -} - - -; The other control bits are ignored when zero mask bits of the immediate are set. - -define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0xff( -; CHECK-NEXT: ret <4 x double> zeroinitializer -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255) - ret <4 x double> %res - -} - - -; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the -; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible.. - -define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x00( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x01( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x02( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x03( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x10( -; CHECK-NEXT: ret <4 x double> %a0 -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x11( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x12( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x13( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x20( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x21( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x22( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x23( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x30( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x31( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x32( -; CHECK-NEXT: ret <4 x double> %a1 -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x33( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51) - ret <4 x double> %res - -} - -; Confirm that a mask for 32-bit elements is also correct. - -define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) { -; CHECK-LABEL: @perm2ps_0x31( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEXT: ret <8 x float> [[TMP1]] -; - %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49) - ret <8 x float> %res - -} - - -; Confirm that the AVX2 version works the same. - -define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) { -; CHECK-LABEL: @perm2i_0x33( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51) - ret <4 x i64> %res - -} - - -; Confirm that when a single zero mask bit is set, we replace a source vector with zeros. - -define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x81( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x83( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x28( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x08( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8) - ret <4 x double> %res - -} - -; Check one more with the AVX2 version. - -define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) { -; CHECK-LABEL: @perm2i_0x28( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40) - ret <4 x i64> %res - -} - -declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone -declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone -declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone -declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone - |

