diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-12-18 07:54:23 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-12-18 07:54:23 +0000 |
commit | 7029db0eaa2255ea223dc49eedd82b2431da2d50 (patch) | |
tree | 6da2ce3b794321db6fee623994ca6289e4daeca7 /llvm/test | |
parent | c9318c6cf2104c5caa3d063f046121b9fdfb2728 (diff) | |
download | bcm5719-llvm-7029db0eaa2255ea223dc49eedd82b2431da2d50.tar.gz bcm5719-llvm-7029db0eaa2255ea223dc49eedd82b2431da2d50.zip |
[X86][SSE][AVX-512] Convert FAND/FOR/FXOR/FANDN nodes to integer operations if they are available. This will allow a bunch of patterns to be removed.
These nodes are only emitted for lowering FABS/FNEG/FNABS/FCOPYSIGN. Ideally we just wouldn't create these nodes if SSE2 or higher is available, but it was simple to just convert them in DAG combine.
For SSE2, AVX, and AVX512 with DQI this is no functional change as the execution domain fixing pass ensures the right domain is selected regardless of the ISD opcode.
For AVX-512 without DQI we end up using integer instructions since the floating point versions aren't available. But we were already doing that for any logical operations in code that didn't come from FABS/FNEG/FNABS/FCOPYSIGN so this seems no worse. And we get the benefit of being able to fold broadcasts now.
llvm-svn: 290060
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-arith.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr13577.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec-copysign-avx512.ll | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_fabs.ll | 54 |
4 files changed, 58 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll index 25892de7ed0..d399e808f7f 100644 --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -1011,8 +1011,7 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { ; ; AVX512VL-LABEL: test_fxor_8f32: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 -; AVX512VL-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: test_fxor_8f32: diff --git a/llvm/test/CodeGen/X86/pr13577.ll b/llvm/test/CodeGen/X86/pr13577.ll index a89a503fd49..1b1622513ea 100644 --- a/llvm/test/CodeGen/X86/pr13577.ll +++ b/llvm/test/CodeGen/X86/pr13577.ll @@ -30,10 +30,9 @@ declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone define float @pr26070() { ; CHECK-LABEL: pr26070: ; CHECK: ## BB#0: -; CHECK-NEXT: andps {{.*}}(%rip), %xmm1 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] -; CHECK-NEXT: orps %xmm1, %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; %c = call float @copysignf(float 1.0, float undef) readnone diff --git a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll index 53c89a872c8..73349a3c117 100644 --- a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll +++ b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll @@ -5,11 +5,9 @@ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind { ; AVX512VL-LABEL: v4f32: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 -; AVX512VL-NEXT: vandps %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 -; AVX512VL-NEXT: vandps %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v4f32: @@ -25,11 +23,9 @@ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind { define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind { ; AVX512VL-LABEL: v8f32: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 -; AVX512VL-NEXT: vandps %ymm2, %ymm1, %ymm1 -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 -; AVX512VL-NEXT: vandps %ymm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; AVX512VL-NEXT: vporq %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v8f32: @@ -61,12 +57,19 @@ define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind { } define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind { -; CHECK-LABEL: v2f64: -; CHECK: ## BB#0: -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; AVX512VL-LABEL: v2f64: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512VLDQ-LABEL: v2f64: +; AVX512VLDQ: ## BB#0: +; AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b ) ret <2 x double> %tmp } @@ -74,11 +77,9 @@ define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind { define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind { ; AVX512VL-LABEL: v4f64: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 -; AVX512VL-NEXT: vandps %ymm2, %ymm1, %ymm1 -; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 -; AVX512VL-NEXT: vandps %ymm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512VL-NEXT: vporq %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v4f64: diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll index 15941f7a8bc..e61f01ec0c6 100644 --- a/llvm/test/CodeGen/X86/vec_fabs.ll +++ b/llvm/test/CodeGen/X86/vec_fabs.ll @@ -10,15 +10,35 @@ ; 2013. define <2 x double> @fabs_v2f64(<2 x double> %p) { -; X32-LABEL: fabs_v2f64: -; X32: # BB#0: -; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: retl +; X32_AVX-LABEL: fabs_v2f64: +; X32_AVX: # BB#0: +; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32_AVX-NEXT: retl ; -; X64-LABEL: fabs_v2f64: -; X64: # BB#0: -; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: retq +; X32_AVX512VL-LABEL: fabs_v2f64: +; X32_AVX512VL: # BB#0: +; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %xmm0, %xmm0 +; X32_AVX512VL-NEXT: retl +; +; X32_AVX512VLDQ-LABEL: fabs_v2f64: +; X32_AVX512VLDQ: # BB#0: +; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32_AVX512VLDQ-NEXT: retl +; +; X64_AVX-LABEL: fabs_v2f64: +; X64_AVX: # BB#0: +; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX-NEXT: retq +; +; X64_AVX512VL-LABEL: fabs_v2f64: +; X64_AVX512VL: # BB#0: +; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX512VL-NEXT: retq +; +; X64_AVX512VLDQ-LABEL: fabs_v2f64: +; X64_AVX512VLDQ: # BB#0: +; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX512VLDQ-NEXT: retq %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p) ret <2 x double> %t } @@ -32,8 +52,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) { ; ; X32_AVX512VL-LABEL: fabs_v4f32: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1 -; X32_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0 +; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v4f32: @@ -48,8 +67,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) { ; ; X64_AVX512VL-LABEL: fabs_v4f32: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 -; X64_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0 +; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v4f32: @@ -69,8 +87,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) { ; ; X32_AVX512VL-LABEL: fabs_v4f64: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %ymm1 -; X32_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v4f64: @@ -85,8 +102,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) { ; ; X64_AVX512VL-LABEL: fabs_v4f64: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1 -; X64_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v4f64: @@ -106,8 +122,7 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) { ; ; X32_AVX512VL-LABEL: fabs_v8f32: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %ymm1 -; X32_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v8f32: @@ -122,8 +137,7 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) { ; ; X64_AVX512VL-LABEL: fabs_v8f32: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 -; X64_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v8f32: |