diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 67 |
1 files changed, 41 insertions, 26 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1a02037fcd4..c4712198693 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -100,6 +100,16 @@ static cl::opt<bool> DisableLoopAlignment( cl::desc("Do not align and prefetch loops"), cl::init(false)); +static bool hasFP32Denormals(const MachineFunction &MF) { + const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + return Info->getMode().FP32Denormals; +} + +static bool hasFP64FP16Denormals(const MachineFunction &MF) { + const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + return Info->getMode().FP64FP16Denormals; +} + static unsigned findFirstFreeSGPR(CCState &CCInfo) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { @@ -370,9 +380,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FLOG10, MVT::f16, Custom); } - // v_mad_f32 does not support denormals according to some sources. - if (!Subtarget->hasFP32Denormals()) - setOperationAction(ISD::FMAD, MVT::f32, Legal); + // v_mad_f32 does not support denormals. We report it as unconditionally + // legal, and the context where it is formed will disallow it when fp32 + // denormals are enabled. + setOperationAction(ISD::FMAD, MVT::f32, Legal); if (!Subtarget->hasBFI()) { // fcopysign can be done in a single instruction with BFI. @@ -510,7 +521,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // F16 - VOP3 Actions. setOperationAction(ISD::FMA, MVT::f16, Legal); - if (!Subtarget->hasFP16Denormals() && STI.hasMadF16()) + if (STI.hasMadF16()) setOperationAction(ISD::FMAD, MVT::f16, Legal); for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16}) { @@ -772,8 +783,9 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, EVT SrcVT) const { return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) || (Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) && - DestVT.getScalarType() == MVT::f32 && !Subtarget->hasFP32Denormals() && - SrcVT.getScalarType() == MVT::f16; + DestVT.getScalarType() == MVT::f32 && + SrcVT.getScalarType() == MVT::f16 && + !hasFP32Denormals(DAG.getMachineFunction()); } bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const { @@ -3930,7 +3942,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, // mad available which returns the same result as the separate operations // which we should prefer over fma. We can't use this if we want to support // denormals, so only report this in these cases. - if (Subtarget->hasFP32Denormals()) + if (hasFP32Denormals(MF)) return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts(); // If the subtarget has v_fmac_f32, that's just as good as v_mac_f32. @@ -3939,7 +3951,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, case MVT::f64: return true; case MVT::f16: - return Subtarget->has16BitInsts() && Subtarget->hasFP16Denormals(); + return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF); default: break; } @@ -3953,9 +3965,11 @@ bool SITargetLowering::isFMADLegalForFAddFSub(const SelectionDAG &DAG, // v_mad_f32/v_mac_f32 do not support denormals. EVT VT = N->getValueType(0); if (VT == MVT::f32) - return !Subtarget->hasFP32Denormals(); - if (VT == MVT::f16) - return !Subtarget->hasFP16Denormals() && Subtarget->hasMadF16(); + return !hasFP32Denormals(DAG.getMachineFunction()); + if (VT == MVT::f16) { + return Subtarget->hasMadF16() && + !hasFP64FP16Denormals(DAG.getMachineFunction()); + } return false; } @@ -7564,7 +7578,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, const SDNodeFlags Flags = Op->getFlags(); bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || Flags.hasAllowReciprocal(); - if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) + if (!Unsafe && VT == MVT::f32 && hasFP32Denormals(DAG.getMachineFunction())) return SDValue(); if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) { @@ -7707,7 +7721,7 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const { static const SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG, const SDLoc &SL, const GCNSubtarget *ST) { assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE"); - int DPDenormModeDefault = ST->hasFP64Denormals() + int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction()) ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; @@ -7743,7 +7757,9 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_); const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16); - if (!Subtarget->hasFP32Denormals()) { + const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction()); + + if (!HasFP32Denormals) { SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue EnableDenorm; @@ -7787,8 +7803,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, NumeratorScaled, Fma3); - if (!Subtarget->hasFP32Denormals()) { - + if (!HasFP32Denormals) { SDValue DisableDenorm; if (Subtarget->hasDenormModeInst()) { const SDValue DisableDenormValue = @@ -8762,7 +8777,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, auto F = CFP->getValueAPF(); if (F.isNaN() && F.isSignaling()) return false; - return !F.isDenormal() || denormalsEnabledForType(Op.getValueType()); + return !F.isDenormal() || denormalsEnabledForType(DAG, Op.getValueType()); } // If source is a result of another standard FP operation it is already in @@ -8831,7 +8846,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, // snans will be quieted, so we only need to worry about denormals. if (Subtarget->supportsMinMaxDenormModes() || - denormalsEnabledForType(Op.getValueType())) + denormalsEnabledForType(DAG, Op.getValueType())) return true; // Flushing may be required. @@ -8903,7 +8918,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, LLVM_FALLTHROUGH; } default: - return denormalsEnabledForType(Op.getValueType()) && + return denormalsEnabledForType(DAG, Op.getValueType()) && DAG.isKnownNeverSNaN(Op); } @@ -8914,7 +8929,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, SDValue SITargetLowering::getCanonicalConstantFP( SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const { // Flush denormals to 0 if not enabled. - if (C.isDenormal() && !denormalsEnabledForType(VT)) + if (C.isDenormal() && !denormalsEnabledForType(DAG, VT)) return DAG.getConstantFP(0.0, SL, VT); if (C.isNaN()) { @@ -9452,8 +9467,8 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, // Only do this if we are not trying to support denormals. v_mad_f32 does not // support denormals ever. - if (((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) || - (VT == MVT::f16 && !Subtarget->hasFP16Denormals() && + if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) || + (VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) && getSubtarget()->hasMadF16())) && isOperationLegal(ISD::FMAD, VT)) return ISD::FMAD; @@ -10964,14 +10979,14 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, return false; } -bool SITargetLowering::denormalsEnabledForType(EVT VT) const { +bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG, + EVT VT) const { switch (VT.getScalarType().getSimpleVT().SimpleTy) { case MVT::f32: - return Subtarget->hasFP32Denormals(); + return hasFP32Denormals(DAG.getMachineFunction()); case MVT::f64: - return Subtarget->hasFP64Denormals(); case MVT::f16: - return Subtarget->hasFP16Denormals(); + return hasFP64FP16Denormals(DAG.getMachineFunction()); default: return false; } |