summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-05-15 14:16:24 +0000
committerSanjay Patel <spatel@rotateright.com>2018-05-15 14:16:24 +0000
commit8652c53d291f26691e359c115d58574ddf742a0b (patch)
treef2726f60023f82d65065de28025da482c5b5f545 /llvm/test/CodeGen/PowerPC
parent891ebcdbaad113b81511af0cf232908c484d3cc6 (diff)
downloadbcm5719-llvm-8652c53d291f26691e359c115d58574ddf742a0b.tar.gz
bcm5719-llvm-8652c53d291f26691e359c115d58574ddf742a0b.zip
[DAG] propagate FMF for all FPMathOperators
This is a simple hack based on what's proposed in D37686, but we can extend it if needed in follow-ups. It gets us most of the FMF functionality that we want without adding any state bits to the flags. It also intentionally leaves out non-FMF flags (nsw, etc) to minimize the patch. It should provide a superset of the functionality from D46563 - the extra tests show propagation and codegen diffs for fcmp, vecreduce, and FP libcalls. The PPC log2() test shows the limits of this most basic approach - we only applied 'afn' to the last node created for the call. AFAIK, there aren't any libcall optimizations based on the flags currently, so that shouldn't make any difference. Differential Revision: https://reviews.llvm.org/D46854 llvm-svn: 332358
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
-rw-r--r--llvm/test/CodeGen/PowerPC/fmf-propagation.ll22
1 files changed, 10 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index f2a1a528e17..b4fb412dda2 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -156,7 +156,7 @@ define float @fmul_fadd_fast2(float %x, float %y, float %z) {
; This is the minimum FMF needed for this transform - the FMA allows reassociation.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
-; FMFDEBUG: fma {{t[0-9]+}}
+; FMFDEBUG: fma reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
@@ -192,7 +192,7 @@ define float @fmul_fma_reassoc1(float %x) {
; This shouldn't change anything - the intermediate fmul result is now also flagged.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
-; FMFDEBUG: fma {{t[0-9]+}}
+; FMFDEBUG: fma reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
@@ -228,7 +228,7 @@ define float @fmul_fma_reassoc2(float %x) {
; The FMA is now fully 'fast'. This implies that reassociation is allowed.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
-; FMFDEBUG: fma {{t[0-9]+}}
+; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
@@ -264,7 +264,7 @@ define float @fmul_fma_fast1(float %x) {
; This shouldn't change anything - the intermediate fmul result is now also flagged.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
-; FMFDEBUG: fma {{t[0-9]+}}
+; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
@@ -300,7 +300,7 @@ define float @fmul_fma_fast2(float %x) {
; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
-; FMFDEBUG: fsqrt {{t[0-9]+}}
+; FMFDEBUG: fsqrt afn {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
@@ -340,7 +340,7 @@ define float @sqrt_afn(float %x) {
; The call is now fully 'fast'. This implies that approximation is allowed.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
-; FMFDEBUG: fsqrt {{t[0-9]+}}
+; FMFDEBUG: fsqrt nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
@@ -391,10 +391,8 @@ define double @fcmp_nnan(double %a, double %y, double %z) {
; FMF-LABEL: fcmp_nnan:
; FMF: # %bb.0:
; FMF-NEXT: xxlxor 0, 0, 0
-; FMF-NEXT: fcmpu 0, 1, 1
-; FMF-NEXT: fcmpu 1, 1, 0
-; FMF-NEXT: cror 20, 4, 3
-; FMF-NEXT: bc 12, 20, .LBB12_2
+; FMF-NEXT: xscmpudp 0, 1, 0
+; FMF-NEXT: blt 0, .LBB12_2
; FMF-NEXT: # %bb.1:
; FMF-NEXT: fmr 3, 2
; FMF-NEXT: .LBB12_2:
@@ -421,13 +419,13 @@ define double @fcmp_nnan(double %a, double %y, double %z) {
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2>
; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
-; FMFDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1
+; FMFDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2>
; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
-; GLOBALDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1
+; GLOBALDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'
declare double @log2(double)
OpenPOWER on IntegriCloud