[X86][SSE][AVX-512] Convert FAND/FOR/FXOR/FANDN nodes to integer operations if they are available. This will allow a bunch of patterns to be removed.

These nodes are only emitted for lowering FABS/FNEG/FNABS/FCOPYSIGN. Ideally we just wouldn't create these nodes if SSE2 or higher is available, but it was simple to just convert them in DAG combine. For SSE2, AVX, and AVX512 with DQI this is no functional change as the execution domain fixing pass ensures the right domain is selected regardless of the ISD opcode. For AVX-512 without DQI we end up using integer instructions since the floating point versions aren't available. But we were already doing that for any logical operations in code that didn't come from FABS/FNEG/FNABS/FCOPYSIGN so this seems no worse. And we get the benefit of being able to fold broadcasts now. llvm-svn: 290060
author: Craig Topper <craig.topper@gmail.com> 2016-12-18 07:54:23 +0000
committer: Craig Topper <craig.topper@gmail.com> 2016-12-18 07:54:23 +0000
commit: 7029db0eaa2255ea223dc49eedd82b2431da2d50 (patch)
tree: 6da2ce3b794321db6fee623994ca6289e4daeca7 /llvm/test
parent: c9318c6cf2104c5caa3d063f046121b9fdfb2728 (diff)
download: bcm5719-llvm-7029db0eaa2255ea223dc49eedd82b2431da2d50.tar.gz
bcm5719-llvm-7029db0eaa2255ea223dc49eedd82b2431da2d50.zip
4 files changed, 58 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll
index 25892de7ed0..d399e808f7f 100644
--- a/llvm/test/CodeGen/X86/avx512-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512-arith.ll
@@ -1011,8 +1011,7 @@ define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
 ;
 ; AVX512VL-LABEL: test_fxor_8f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
-; AVX512VL-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_fxor_8f32:
diff --git a/llvm/test/CodeGen/X86/pr13577.ll b/llvm/test/CodeGen/X86/pr13577.ll
index a89a503fd49..1b1622513ea 100644
--- a/llvm/test/CodeGen/X86/pr13577.ll
+++ b/llvm/test/CodeGen/X86/pr13577.ll
@@ -30,10 +30,9 @@ declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
 define float @pr26070() {
 ; CHECK-LABEL: pr26070:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    andps {{.*}}(%rip), %xmm1
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
 ;
   %c = call float @copysignf(float 1.0, float undef) readnone
diff --git a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll
index 53c89a872c8..73349a3c117 100644
--- a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll
+++ b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll
@@ -5,11 +5,9 @@
 define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
 ; AVX512VL-LABEL: v4f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
-; AVX512VL-NEXT:    vandps %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
-; AVX512VL-NEXT:    vandps %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT:    vporq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512VLDQ-LABEL: v4f32:
@@ -25,11 +23,9 @@ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
 define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind {
 ; AVX512VL-LABEL: v8f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX512VL-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512VL-NEXT:    vporq %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512VLDQ-LABEL: v8f32:
@@ -61,12 +57,19 @@ define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind {
 }
 
 define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
-; CHECK-LABEL: v2f64:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vandps {{.*}}(%rip), %xmm1, %xmm1
-; CHECK-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; AVX512VL-LABEL: v2f64:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT:    vporq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: v2f64:
+; AVX512VLDQ:       ## BB#0:
+; AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
   %tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b )
   ret <2 x double> %tmp
 }
@@ -74,11 +77,9 @@ define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
 define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind {
 ; AVX512VL-LABEL: v4f64:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX512VL-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512VL-NEXT:    vporq %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512VLDQ-LABEL: v4f64:
diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll
index 15941f7a8bc..e61f01ec0c6 100644
--- a/llvm/test/CodeGen/X86/vec_fabs.ll
+++ b/llvm/test/CodeGen/X86/vec_fabs.ll
@@ -10,15 +10,35 @@
 ; 2013.
 
 define <2 x double> @fabs_v2f64(<2 x double> %p) {
-; X32-LABEL: fabs_v2f64:
-; X32:       # BB#0:
-; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT:    retl
+; X32_AVX-LABEL: fabs_v2f64:
+; X32_AVX:       # BB#0:
+; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX-NEXT:    retl
 ;
-; X64-LABEL: fabs_v2f64:
-; X64:       # BB#0:
-; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT:    retq
+; X32_AVX512VL-LABEL: fabs_v2f64:
+; X32_AVX512VL:       # BB#0:
+; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX512VL-NEXT:    retl
+;
+; X32_AVX512VLDQ-LABEL: fabs_v2f64:
+; X32_AVX512VLDQ:       # BB#0:
+; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX512VLDQ-NEXT:    retl
+;
+; X64_AVX-LABEL: fabs_v2f64:
+; X64_AVX:       # BB#0:
+; X64_AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX-NEXT:    retq
+;
+; X64_AVX512VL-LABEL: fabs_v2f64:
+; X64_AVX512VL:       # BB#0:
+; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX512VL-NEXT:    retq
+;
+; X64_AVX512VLDQ-LABEL: fabs_v2f64:
+; X64_AVX512VLDQ:       # BB#0:
+; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX512VLDQ-NEXT:    retq
   %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
@@ -32,8 +52,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 ;
 ; X32_AVX512VL-LABEL: fabs_v4f32:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastss {{\.LCPI.*}}, %xmm1
-; X32_AVX512VL-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v4f32:
@@ -48,8 +67,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 ;
 ; X64_AVX512VL-LABEL: fabs_v4f32:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
-; X64_AVX512VL-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v4f32:
@@ -69,8 +87,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ;
 ; X32_AVX512VL-LABEL: fabs_v4f64:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastsd {{\.LCPI.*}}, %ymm1
-; X32_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v4f64:
@@ -85,8 +102,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ;
 ; X64_AVX512VL-LABEL: fabs_v4f64:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
-; X64_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v4f64:
@@ -106,8 +122,7 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ;
 ; X32_AVX512VL-LABEL: fabs_v8f32:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastss {{\.LCPI.*}}, %ymm1
-; X32_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v8f32:
@@ -122,8 +137,7 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ;
 ; X64_AVX512VL-LABEL: fabs_v8f32:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
-; X64_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v8f32:
author	Craig Topper <craig.topper@gmail.com>	2016-12-18 07:54:23 +0000
committer	Craig Topper <craig.topper@gmail.com>	2016-12-18 07:54:23 +0000
commit	7029db0eaa2255ea223dc49eedd82b2431da2d50 (patch)
tree	6da2ce3b794321db6fee623994ca6289e4daeca7 /llvm/test
parent	c9318c6cf2104c5caa3d063f046121b9fdfb2728 (diff)
download	bcm5719-llvm-7029db0eaa2255ea223dc49eedd82b2431da2d50.tar.gz bcm5719-llvm-7029db0eaa2255ea223dc49eedd82b2431da2d50.zip