summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h7
-rw-r--r--llvm/include/llvm/IR/RuntimeLibcalls.def5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp29
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--llvm/test/CodeGen/X86/pow.ll50
7 files changed, 88 insertions, 10 deletions
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 842f27f19e1..ec9c46140d7 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -550,11 +550,8 @@ namespace ISD {
/// is often a storage-only type but has native conversions.
FP16_TO_FP, FP_TO_FP16,
- /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
- /// FLOG, FLOG2, FLOG10, FEXP, FEXP2,
- /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary
- /// floating point operations. These are inspired by libm.
- FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
+ /// Perform various unary floating-point operations inspired by libm.
+ FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
FLOG, FLOG2, FLOG10, FEXP, FEXP2,
FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
/// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 840a00257a3..89005120cdc 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -128,6 +128,11 @@ HANDLE_LIBCALL(SQRT_F64, "sqrt")
HANDLE_LIBCALL(SQRT_F80, "sqrtl")
HANDLE_LIBCALL(SQRT_F128, "sqrtl")
HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl")
+HANDLE_LIBCALL(CBRT_F32, "cbrtf")
+HANDLE_LIBCALL(CBRT_F64, "cbrt")
+HANDLE_LIBCALL(CBRT_F80, "cbrtl")
+HANDLE_LIBCALL(CBRT_F128, "cbrtl")
+HANDLE_LIBCALL(CBRT_PPCF128, "cbrtl")
HANDLE_LIBCALL(LOG_F32, "logf")
HANDLE_LIBCALL(LOG_F64, "log")
HANDLE_LIBCALL(LOG_F80, "logl")
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 20e749ba569..4ef16cb7186 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11571,6 +11571,34 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
if (!ExponentC)
return SDValue();
+ // Try to convert x ** (1/3) into cube root.
+ // TODO: Handle the various flavors of long double.
+ // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
+ // Some range near 1/3 should be fine.
+ EVT VT = N->getValueType(0);
+ if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
+ (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
+ // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
+ // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
+ // pow(-val, 1/3) = nan; cbrt(-val) = -num.
+ // For regular numbers, rounding may cause the results to differ.
+ // Therefore, we require { nsz ninf nnan afn } for this transform.
+ // TODO: We could select out the special cases if we don't have nsz/ninf.
+ SDNodeFlags Flags = N->getFlags();
+ if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ // Do not create a cbrt() libcall if the target does not have it, and do not
+ // turn a pow that has lowering support into a cbrt() libcall.
+ if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
+ (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
+ DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
+ return SDValue();
+
+ return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+ }
+
// Try to convert x ** (1/4) into square roots.
// x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
// TODO: This could be extended (using a target hook) to handle smaller
@@ -11587,7 +11615,6 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
return SDValue();
// Don't double the number of libcalls. We are trying to inline fast code.
- EVT VT = N->getValueType(0);
if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 13d412cd7de..b6bd8541ac7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4047,6 +4047,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128));
break;
+ case ISD::FCBRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128));
+ break;
case ISD::FSIN:
case ISD::STRICT_FSIN:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 7289579502d..594a587e412 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -181,6 +181,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::STRICT_FSQRT: return "strict_fsqrt";
+ case ISD::FCBRT: return "fcbrt";
case ISD::FSIN: return "fsin";
case ISD::STRICT_FSIN: return "strict_fsin";
case ISD::FCOS: return "fcos";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index aeb321f4a42..b9cdbeabdb3 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -666,6 +666,7 @@ void TargetLoweringBase::initActions() {
// These library functions default to expand.
for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+ setOperationAction(ISD::FCBRT, VT, Expand);
setOperationAction(ISD::FLOG , VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
diff --git a/llvm/test/CodeGen/X86/pow.ll b/llvm/test/CodeGen/X86/pow.ll
index db8ac76e09c..639f7dd9f18 100644
--- a/llvm/test/CodeGen/X86/pow.ll
+++ b/llvm/test/CodeGen/X86/pow.ll
@@ -7,6 +7,8 @@ declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
declare double @llvm.pow.f64(double, double)
declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80)
+
define float @pow_f32_one_fourth_fmf(float %x) nounwind {
; CHECK-LABEL: pow_f32_one_fourth_fmf:
; CHECK: # %bb.0:
@@ -165,8 +167,7 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi
define float @pow_f32_one_third_fmf(float %x) nounwind {
; CHECK-LABEL: pow_f32_one_third_fmf:
; CHECK: # %bb.0:
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: jmp powf # TAILCALL
+; CHECK-NEXT: jmp cbrtf # TAILCALL
%one = uitofp i32 1 to float
%three = uitofp i32 3 to float
%exp = fdiv float %one, %three
@@ -177,8 +178,7 @@ define float @pow_f32_one_third_fmf(float %x) nounwind {
define double @pow_f64_one_third_fmf(double %x) nounwind {
; CHECK-LABEL: pow_f64_one_third_fmf:
; CHECK: # %bb.0:
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: jmp pow # TAILCALL
+; CHECK-NEXT: jmp cbrt # TAILCALL
%one = uitofp i32 1 to double
%three = uitofp i32 3 to double
%exp = fdiv double %one, %three
@@ -186,3 +186,45 @@ define double @pow_f64_one_third_fmf(double %x) nounwind {
ret double %r
}
+; TODO: We could turn this into cbrtl, but currently we only handle float/double types.
+
+define x86_fp80 @pow_f80_one_third_fmf(x86_fp80 %x) nounwind {
+; CHECK-LABEL: pow_f80_one_third_fmf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{.*}}(%rip)
+; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fstpt (%rsp)
+; CHECK-NEXT: callq powl
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+ %one = uitofp i32 1 to x86_fp80
+ %three = uitofp i32 3 to x86_fp80
+ %exp = fdiv x86_fp80 %one, %three
+ %r = call nsz nnan ninf afn x86_fp80 @llvm.pow.f80(x86_fp80 %x, x86_fp80 %exp)
+ ret x86_fp80 %r
+}
+
+; We might want to allow this. The exact hex value for 1/3 as a double is 0x3fd5555555555555.
+
+define double @pow_f64_not_exactly_one_third_fmf(double %x) nounwind {
+; CHECK-LABEL: pow_f64_not_exactly_one_third_fmf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: jmp pow # TAILCALL
+ %r = call nsz nnan ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555556)
+ ret double %r
+}
+
+; We require all 4 of nsz, ninf, nnan, afn.
+
+define double @pow_f64_not_enough_fmf(double %x) nounwind {
+; CHECK-LABEL: pow_f64_not_enough_fmf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: jmp pow # TAILCALL
+ %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555)
+ ret double %r
+}
+
OpenPOWER on IntegriCloud