diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2018-05-15 14:16:24 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2018-05-15 14:16:24 +0000 |
| commit | 8652c53d291f26691e359c115d58574ddf742a0b (patch) | |
| tree | f2726f60023f82d65065de28025da482c5b5f545 /llvm/test/CodeGen/PowerPC | |
| parent | 891ebcdbaad113b81511af0cf232908c484d3cc6 (diff) | |
| download | bcm5719-llvm-8652c53d291f26691e359c115d58574ddf742a0b.tar.gz bcm5719-llvm-8652c53d291f26691e359c115d58574ddf742a0b.zip | |
[DAG] propagate FMF for all FPMathOperators
This is a simple hack based on what's proposed in D37686, but we can extend it if needed in follow-ups.
It gets us most of the FMF functionality that we want without adding any state bits to the flags. It
also intentionally leaves out non-FMF flags (nsw, etc) to minimize the patch.
It should provide a superset of the functionality from D46563 - the extra tests show propagation and
codegen diffs for fcmp, vecreduce, and FP libcalls.
The PPC log2() test shows the limits of this most basic approach - we only applied 'afn' to the last
node created for the call. AFAIK, there aren't any libcall optimizations based on the flags currently,
so that shouldn't make any difference.
Differential Revision: https://reviews.llvm.org/D46854
llvm-svn: 332358
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/fmf-propagation.ll | 22 |
1 files changed, 10 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index f2a1a528e17..b4fb412dda2 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -156,7 +156,7 @@ define float @fmul_fadd_fast2(float %x, float %y, float %z) { ; This is the minimum FMF needed for this transform - the FMA allows reassociation. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' -; FMFDEBUG: fma {{t[0-9]+}} +; FMFDEBUG: fma reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' @@ -192,7 +192,7 @@ define float @fmul_fma_reassoc1(float %x) { ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' -; FMFDEBUG: fma {{t[0-9]+}} +; FMFDEBUG: fma reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' @@ -228,7 +228,7 @@ define float @fmul_fma_reassoc2(float %x) { ; The FMA is now fully 'fast'. This implies that reassociation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' -; FMFDEBUG: fma {{t[0-9]+}} +; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' @@ -264,7 +264,7 @@ define float @fmul_fma_fast1(float %x) { ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' -; FMFDEBUG: fma {{t[0-9]+}} +; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' @@ -300,7 +300,7 @@ define float @fmul_fma_fast2(float %x) { ; Reduced precision for sqrt is allowed - should use estimate and NR iterations. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' -; FMFDEBUG: fsqrt {{t[0-9]+}} +; FMFDEBUG: fsqrt afn {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' @@ -340,7 +340,7 @@ define float @sqrt_afn(float %x) { ; The call is now fully 'fast'. This implies that approximation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' -; FMFDEBUG: fsqrt {{t[0-9]+}} +; FMFDEBUG: fsqrt nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' @@ -391,10 +391,8 @@ define double @fcmp_nnan(double %a, double %y, double %z) { ; FMF-LABEL: fcmp_nnan: ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 -; FMF-NEXT: fcmpu 0, 1, 1 -; FMF-NEXT: fcmpu 1, 1, 0 -; FMF-NEXT: cror 20, 4, 3 -; FMF-NEXT: bc 12, 20, .LBB12_2 +; FMF-NEXT: xscmpudp 0, 1, 0 +; FMF-NEXT: blt 0, .LBB12_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: fmr 3, 2 ; FMF-NEXT: .LBB12_2: @@ -421,13 +419,13 @@ define double @fcmp_nnan(double %a, double %y, double %z) { ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' ; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2> ; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; FMFDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 +; FMFDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' ; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2> ; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; GLOBALDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 +; GLOBALDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' declare double @log2(double) |

