summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.h2
-rw-r--r--llvm/test/CodeGen/NVPTX/fast-math.ll44
2 files changed, 46 insertions, 0 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index f6494f6d37e..ab78d8a936b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -530,6 +530,8 @@ public:
int &ExtraSteps, bool &UseOneConst,
bool Reciprocal) const override;
+ unsigned combineRepeatedFPDivisors() const override { return 2; }
+
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
bool allowUnsafeFPMath(MachineFunction &MF) const;
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index f925d67434c..56b1f88f3b2 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -117,5 +117,49 @@ define float @fcos_approx(float %a) #0 {
ret float %r
}
+; CHECK-LABEL: repeated_div_recip_allowed
+define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) {
+; CHECK: rcp.rn.f32
+; CHECK: mul.rn.f32
+; CHECK: mul.rn.f32
+ %x = fdiv arcp float %a, %divisor
+ %y = fdiv arcp float %b, %divisor
+ %z = select i1 %pred, float %x, float %y
+ ret float %z
+}
+
+; CHECK-LABEL: repeated_div_recip_allowed_ftz
+define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
+; CHECK: rcp.rn.ftz.f32
+; CHECK: mul.rn.ftz.f32
+; CHECK: mul.rn.ftz.f32
+ %x = fdiv arcp float %a, %divisor
+ %y = fdiv arcp float %b, %divisor
+ %z = select i1 %pred, float %x, float %y
+ ret float %z
+}
+
+; CHECK-LABEL: repeated_div_fast
+define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
+; CHECK: rcp.approx.f32
+; CHECK: mul.f32
+; CHECK: mul.f32
+ %x = fdiv float %a, %divisor
+ %y = fdiv float %b, %divisor
+ %z = select i1 %pred, float %x, float %y
+ ret float %z
+}
+
+; CHECK-LABEL: repeated_div_fast_ftz
+define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
+; CHECK: rcp.approx.ftz.f32
+; CHECK: mul.ftz.f32
+; CHECK: mul.ftz.f32
+ %x = fdiv float %a, %divisor
+ %y = fdiv float %b, %divisor
+ %z = select i1 %pred, float %x, float %y
+ ret float %z
+}
+
attributes #0 = { "unsafe-fp-math" = "true" }
attributes #1 = { "nvptx-f32ftz" = "true" }
OpenPOWER on IntegriCloud