diff options
author | Craig Topper <craig.topper@intel.com> | 2017-11-13 08:07:33 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-11-13 08:07:33 +0000 |
commit | 75d71540f862cb5ca4a033e7b6f0daa9aa60f132 (patch) | |
tree | 2f05c850e315f602a1b77855185325f0285b3988 /llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll | |
parent | c748455e51e10076c0353a5c8c083733bcba22d8 (diff) | |
download | bcm5719-llvm-75d71540f862cb5ca4a033e7b6f0daa9aa60f132.tar.gz bcm5719-llvm-75d71540f862cb5ca4a033e7b6f0daa9aa60f132.zip |
[X86] Use sse_load_f32/f64 to improve load folding of scalar vfscalefss/sd, vrcp14ss/sd, rsqrt14ss/sd instructions.
llvm-svn: 318022
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll b/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll index 6a5333d2ba0..20e8b60c114 100644 --- a/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll @@ -11,6 +11,16 @@ define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; ret <4 x float> %res } + +define <4 x float> @test_rsqrt14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { +; CHECK-LABEL: test_rsqrt14_ss_load: +; CHECK: ## BB#0: +; CHECK-NEXT: vrsqrt14ss (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: retq + %a1 = load <4 x float>, <4 x float>* %a1ptr + %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ; + ret <4 x float> %res +} declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone define <4 x float> @test_rcp14_ss(<4 x float> %a0) { @@ -21,6 +31,16 @@ define <4 x float> @test_rcp14_ss(<4 x float> %a0) { %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; ret <4 x float> %res } + +define <4 x float> @test_rcp14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { +; CHECK-LABEL: test_rcp14_ss_load: +; CHECK: ## BB#0: +; CHECK-NEXT: vrcp14ss (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: retq + %a1 = load <4 x float>, <4 x float>* %a1ptr + %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ; + ret <4 x float> %res +} declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) { @@ -31,6 +51,16 @@ define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) { %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ; ret <2 x double> %res } + +define <2 x double> @test_rsqrt14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) { +; CHECK-LABEL: test_rsqrt14_sd_load: +; CHECK: ## BB#0: +; CHECK-NEXT: vrsqrt14sd (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: retq + %a1 = load <2 x double>, <2 x double>* %a1ptr + %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ; + ret <2 x double> %res +} declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone define <2 x double> @test_rcp14_sd(<2 x double> %a0) { @@ -42,6 +72,16 @@ define <2 x double> @test_rcp14_sd(<2 x double> %a0) { ret <2 x double> %res } + +define <2 x double> @test_rcp14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) { +; CHECK-LABEL: test_rcp14_sd_load: +; CHECK: ## BB#0: +; CHECK-NEXT: vrcp14sd (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: retq + %a1 = load <2 x double>, <2 x double>* %a1ptr + %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ; + ret <2 x double> %res +} declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) @@ -67,6 +107,16 @@ define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x floa ret <4 x float> %res2 } +define <4 x float>@test_int_x86_avx512_mask_scalef_ss_load(<4 x float> %x0, <4 x float>* %x1ptr) { +; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss_load: +; CHECK: ## BB#0: +; CHECK-NEXT: vscalefss (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: retq + %x1 = load <4 x float>, <4 x float>* %x1ptr + %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 4) + ret <4 x float> %res +} + declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { ; SKX-LABEL: test_int_x86_avx512_mask_scalef_sd: @@ -89,3 +139,13 @@ define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x do %res2 = fadd <2 x double> %res, %res1 ret <2 x double> %res2 } + +define <2 x double>@test_int_x86_avx512_mask_scalef_sd_load(<2 x double> %x0, <2 x double>* %x1ptr) { +; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd_load: +; CHECK: ## BB#0: +; CHECK-NEXT: vscalefsd (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: retq + %x1 = load <2 x double>, <2 x double>* %x1ptr + %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> undef, i8 -1, i32 4) + ret <2 x double> %res +} |