diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll | 120 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 88 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-int-avx2.ll | 6 |
3 files changed, 123 insertions, 91 deletions
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll index a30d8371775..36b6da5ef96 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll @@ -83,3 +83,123 @@ define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) { } declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone + +define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { + ; CHECK-LABEL: test_x86_avx2_vbroadcast_sd_pd_256: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 + ; CHECK-NEXT: retl + %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly + + +define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) { + ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 + ; CHECK-NEXT: retl + %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly + + +define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) { + ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps_256: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 + ; CHECK-NEXT: retl + %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly + + +define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) { +; CHECK-LABEL: test_x86_avx2_pbroadcastb_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 +; CHECK-NEXT: retl + %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly + + +define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) { +; CHECK-LABEL: test_x86_avx2_pbroadcastb_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 +; CHECK-NEXT: retl + %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly + + +define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) { +; CHECK-LABEL: test_x86_avx2_pbroadcastw_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 +; CHECK-NEXT: retl + %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly + + +define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) { +; CHECK-LABEL: test_x86_avx2_pbroadcastw_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 +; CHECK-NEXT: retl + %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly + + +define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) { +; CHECK-LABEL: test_x86_avx2_pbroadcastd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 +; CHECK-NEXT: retl + %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly + + +define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) { +; CHECK-LABEL: test_x86_avx2_pbroadcastd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: retl + %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly + + +define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) { +; CHECK-LABEL: test_x86_avx2_pbroadcastq_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 +; CHECK-NEXT: retl + %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly + + +define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { +; CHECK-LABEL: test_x86_avx2_pbroadcastq_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: retl + %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index 5b607afef91..3b2a009f271 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -641,30 +641,6 @@ define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) { declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone -define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { - ; CHECK: vbroadcastsd - %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1] - ret <4 x double> %res -} -declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly - - -define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) { - ; CHECK: vbroadcastss - %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly - - -define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) { - ; CHECK: vbroadcastss - %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1] - ret <8 x float> %res -} -declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly - - define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK: vpblendd %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1] @@ -681,70 +657,6 @@ define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone -define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) { - ; CHECK: vpbroadcastb - %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] - ret <16 x i8> %res -} -declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly - - -define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) { - ; CHECK: vpbroadcastb - %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1] - ret <32 x i8> %res -} -declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly - - -define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) { - ; CHECK: vpbroadcastw - %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly - - -define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) { - ; CHECK: vpbroadcastw - %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1] - ret <16 x i16> %res -} -declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly - - -define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) { - ; CHECK: vbroadcastss - %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly - - -define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) { - ; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}} - %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1] - ret <8 x i32> %res -} -declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly - - -define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) { - ; CHECK: vpbroadcastq - %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly - - -define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { - ; CHECK: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}} - %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly - - define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) { ; Check that the arguments are swapped between the intrinsic definition ; and its lowering. Indeed, the offsets are the first source in diff --git a/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll b/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll index 03241bbb741..bcfdfc57030 100644 --- a/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll +++ b/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll @@ -12,7 +12,7 @@ define <4 x double> @stack_fold_broadcastsd_ymm(<2 x double> %a0) { ;CHECK-LABEL: stack_fold_broadcastsd_ymm ;CHECK: vbroadcastsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) + %2 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer ; fadd forces execution domain %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> ret <4 x double> %3 @@ -23,7 +23,7 @@ define <4 x float> @stack_fold_broadcastss(<4 x float> %a0) { ;CHECK-LABEL: stack_fold_broadcastss ;CHECK: vbroadcastss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) + %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer ; fadd forces execution domain %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> ret <4 x float> %3 @@ -34,7 +34,7 @@ define <8 x float> @stack_fold_broadcastss_ymm(<4 x float> %a0) { ;CHECK-LABEL: stack_fold_broadcastss_ymm ;CHECK: vbroadcastss {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) + %2 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer ; fadd forces execution domain %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> ret <8 x float> %3 |

