7 files changed, 88 insertions, 10 deletions
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 842f27f19e1..ec9c46140d7 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -550,11 +550,8 @@ namespace ISD {
     /// is often a storage-only type but has native conversions.
     FP16_TO_FP, FP_TO_FP16,
 
-    /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
-    /// FLOG, FLOG2, FLOG10, FEXP, FEXP2,
-    /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary
-    /// floating point operations. These are inspired by libm.
-    FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
+    /// Perform various unary floating-point operations inspired by libm.
+    FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
     FLOG, FLOG2, FLOG10, FEXP, FEXP2,
     FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
     /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 840a00257a3..89005120cdc 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -128,6 +128,11 @@ HANDLE_LIBCALL(SQRT_F64, "sqrt")
 HANDLE_LIBCALL(SQRT_F80, "sqrtl")
 HANDLE_LIBCALL(SQRT_F128, "sqrtl")
 HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl")
+HANDLE_LIBCALL(CBRT_F32, "cbrtf")
+HANDLE_LIBCALL(CBRT_F64, "cbrt")
+HANDLE_LIBCALL(CBRT_F80, "cbrtl")
+HANDLE_LIBCALL(CBRT_F128, "cbrtl")
+HANDLE_LIBCALL(CBRT_PPCF128, "cbrtl")
 HANDLE_LIBCALL(LOG_F32, "logf")
 HANDLE_LIBCALL(LOG_F64, "log")
 HANDLE_LIBCALL(LOG_F80, "logl")
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 20e749ba569..4ef16cb7186 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11571,6 +11571,34 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
   if (!ExponentC)
     return SDValue();
 
+  // Try to convert x ** (1/3) into cube root.
+  // TODO: Handle the various flavors of long double.
+  // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
+  //       Some range near 1/3 should be fine.
+  EVT VT = N->getValueType(0);
+  if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
+      (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
+    // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
+    // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
+    // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
+    // For regular numbers, rounding may cause the results to differ.
+    // Therefore, we require { nsz ninf nnan afn } for this transform.
+    // TODO: We could select out the special cases if we don't have nsz/ninf.
+    SDNodeFlags Flags = N->getFlags();
+    if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
+        !Flags.hasApproximateFuncs())
+      return SDValue();
+
+    // Do not create a cbrt() libcall if the target does not have it, and do not
+    // turn a pow that has lowering support into a cbrt() libcall.
+    if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
+        (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
+         DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
+      return SDValue();
+
+    return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+  }
+
   // Try to convert x ** (1/4) into square roots.
   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
   // TODO: This could be extended (using a target hook) to handle smaller
@@ -11587,7 +11615,6 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
       return SDValue();
 
     // Don't double the number of libcalls. We are trying to inline fast code.
-    EVT VT = N->getValueType(0);
     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
       return SDValue();
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 13d412cd7de..b6bd8541ac7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4047,6 +4047,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                       RTLIB::SQRT_F80, RTLIB::SQRT_F128,
                                       RTLIB::SQRT_PPCF128));
     break;
+  case ISD::FCBRT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+                                      RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+                                      RTLIB::CBRT_PPCF128));
+    break;
   case ISD::FSIN:
   case ISD::STRICT_FSIN:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 7289579502d..594a587e412 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -181,6 +181,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FNEG:                       return "fneg";
   case ISD::FSQRT:                      return "fsqrt";
   case ISD::STRICT_FSQRT:               return "strict_fsqrt";
+  case ISD::FCBRT:                      return "fcbrt";
   case ISD::FSIN:                       return "fsin";
   case ISD::STRICT_FSIN:                return "strict_fsin";
   case ISD::FCOS:                       return "fcos";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index aeb321f4a42..b9cdbeabdb3 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -666,6 +666,7 @@ void TargetLoweringBase::initActions() {
 
   // These library functions default to expand.
   for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+    setOperationAction(ISD::FCBRT,      VT, Expand);
     setOperationAction(ISD::FLOG ,      VT, Expand);
     setOperationAction(ISD::FLOG2,      VT, Expand);
     setOperationAction(ISD::FLOG10,     VT, Expand);
diff --git a/llvm/test/CodeGen/X86/pow.ll b/llvm/test/CodeGen/X86/pow.ll
index db8ac76e09c..639f7dd9f18 100644
--- a/llvm/test/CodeGen/X86/pow.ll
+++ b/llvm/test/CodeGen/X86/pow.ll
@@ -7,6 +7,8 @@ declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
 declare double @llvm.pow.f64(double, double)
 declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
 
+declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80)
+
 define float @pow_f32_one_fourth_fmf(float %x) nounwind {
 ; CHECK-LABEL: pow_f32_one_fourth_fmf:
 ; CHECK:       # %bb.0:
@@ -165,8 +167,7 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi
 define float @pow_f32_one_third_fmf(float %x) nounwind {
 ; CHECK-LABEL: pow_f32_one_third_fmf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT:    jmp powf # TAILCALL
+; CHECK-NEXT:    jmp cbrtf # TAILCALL
   %one = uitofp i32 1 to float
   %three = uitofp i32 3 to float
   %exp = fdiv float %one, %three
@@ -177,8 +178,7 @@ define float @pow_f32_one_third_fmf(float %x) nounwind {
 define double @pow_f64_one_third_fmf(double %x) nounwind {
 ; CHECK-LABEL: pow_f64_one_third_fmf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT:    jmp pow # TAILCALL
+; CHECK-NEXT:    jmp cbrt # TAILCALL
   %one = uitofp i32 1 to double
   %three = uitofp i32 3 to double
   %exp = fdiv double %one, %three
@@ -186,3 +186,45 @@ define double @pow_f64_one_third_fmf(double %x) nounwind {
   ret double %r
 }
 
+; TODO: We could turn this into cbrtl, but currently we only handle float/double types.
+
+define x86_fp80 @pow_f80_one_third_fmf(x86_fp80 %x) nounwind {
+; CHECK-LABEL: pow_f80_one_third_fmf:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{.*}}(%rip)
+; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpt (%rsp)
+; CHECK-NEXT:    callq powl
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+  %one = uitofp i32 1 to x86_fp80
+  %three = uitofp i32 3 to x86_fp80
+  %exp = fdiv x86_fp80 %one, %three
+  %r = call nsz nnan ninf afn x86_fp80 @llvm.pow.f80(x86_fp80 %x, x86_fp80 %exp)
+  ret x86_fp80 %r
+}
+
+; We might want to allow this. The exact hex value for 1/3 as a double is 0x3fd5555555555555.
+
+define double @pow_f64_not_exactly_one_third_fmf(double %x) nounwind {
+; CHECK-LABEL: pow_f64_not_exactly_one_third_fmf:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    jmp pow # TAILCALL
+  %r = call nsz nnan ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555556)
+  ret double %r
+}
+
+; We require all 4 of nsz, ninf, nnan, afn.
+
+define double @pow_f64_not_enough_fmf(double %x) nounwind {
+; CHECK-LABEL: pow_f64_not_enough_fmf:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    jmp pow # TAILCALL
+  %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555)
+  ret double %r
+}
+