summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp67
1 files changed, 41 insertions, 26 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1a02037fcd4..c4712198693 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -100,6 +100,16 @@ static cl::opt<bool> DisableLoopAlignment(
cl::desc("Do not align and prefetch loops"),
cl::init(false));
+static bool hasFP32Denormals(const MachineFunction &MF) {
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ return Info->getMode().FP32Denormals;
+}
+
+static bool hasFP64FP16Denormals(const MachineFunction &MF) {
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ return Info->getMode().FP64FP16Denormals;
+}
+
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
@@ -370,9 +380,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
}
- // v_mad_f32 does not support denormals according to some sources.
- if (!Subtarget->hasFP32Denormals())
- setOperationAction(ISD::FMAD, MVT::f32, Legal);
+ // v_mad_f32 does not support denormals. We report it as unconditionally
+ // legal, and the context where it is formed will disallow it when fp32
+ // denormals are enabled.
+ setOperationAction(ISD::FMAD, MVT::f32, Legal);
if (!Subtarget->hasBFI()) {
// fcopysign can be done in a single instruction with BFI.
@@ -510,7 +521,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// F16 - VOP3 Actions.
setOperationAction(ISD::FMA, MVT::f16, Legal);
- if (!Subtarget->hasFP16Denormals() && STI.hasMadF16())
+ if (STI.hasMadF16())
setOperationAction(ISD::FMAD, MVT::f16, Legal);
for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16}) {
@@ -772,8 +783,9 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
EVT DestVT, EVT SrcVT) const {
return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
- DestVT.getScalarType() == MVT::f32 && !Subtarget->hasFP32Denormals() &&
- SrcVT.getScalarType() == MVT::f16;
+ DestVT.getScalarType() == MVT::f32 &&
+ SrcVT.getScalarType() == MVT::f16 &&
+ !hasFP32Denormals(DAG.getMachineFunction());
}
bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
@@ -3930,7 +3942,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
// mad available which returns the same result as the separate operations
// which we should prefer over fma. We can't use this if we want to support
// denormals, so only report this in these cases.
- if (Subtarget->hasFP32Denormals())
+ if (hasFP32Denormals(MF))
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
// If the subtarget has v_fmac_f32, that's just as good as v_mac_f32.
@@ -3939,7 +3951,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
case MVT::f64:
return true;
case MVT::f16:
- return Subtarget->has16BitInsts() && Subtarget->hasFP16Denormals();
+ return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF);
default:
break;
}
@@ -3953,9 +3965,11 @@ bool SITargetLowering::isFMADLegalForFAddFSub(const SelectionDAG &DAG,
// v_mad_f32/v_mac_f32 do not support denormals.
EVT VT = N->getValueType(0);
if (VT == MVT::f32)
- return !Subtarget->hasFP32Denormals();
- if (VT == MVT::f16)
- return !Subtarget->hasFP16Denormals() && Subtarget->hasMadF16();
+ return !hasFP32Denormals(DAG.getMachineFunction());
+ if (VT == MVT::f16) {
+ return Subtarget->hasMadF16() &&
+ !hasFP64FP16Denormals(DAG.getMachineFunction());
+ }
return false;
}
@@ -7564,7 +7578,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
const SDNodeFlags Flags = Op->getFlags();
bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || Flags.hasAllowReciprocal();
- if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals())
+ if (!Unsafe && VT == MVT::f32 && hasFP32Denormals(DAG.getMachineFunction()))
return SDValue();
if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
@@ -7707,7 +7721,7 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
static const SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
const SDLoc &SL, const GCNSubtarget *ST) {
assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
- int DPDenormModeDefault = ST->hasFP64Denormals()
+ int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction())
? FP_DENORM_FLUSH_NONE
: FP_DENORM_FLUSH_IN_FLUSH_OUT;
@@ -7743,7 +7757,9 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
- if (!Subtarget->hasFP32Denormals()) {
+ const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
+
+ if (!HasFP32Denormals) {
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue EnableDenorm;
@@ -7787,8 +7803,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
NumeratorScaled, Fma3);
- if (!Subtarget->hasFP32Denormals()) {
-
+ if (!HasFP32Denormals) {
SDValue DisableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue DisableDenormValue =
@@ -8762,7 +8777,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
auto F = CFP->getValueAPF();
if (F.isNaN() && F.isSignaling())
return false;
- return !F.isDenormal() || denormalsEnabledForType(Op.getValueType());
+ return !F.isDenormal() || denormalsEnabledForType(DAG, Op.getValueType());
}
// If source is a result of another standard FP operation it is already in
@@ -8831,7 +8846,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
// snans will be quieted, so we only need to worry about denormals.
if (Subtarget->supportsMinMaxDenormModes() ||
- denormalsEnabledForType(Op.getValueType()))
+ denormalsEnabledForType(DAG, Op.getValueType()))
return true;
// Flushing may be required.
@@ -8903,7 +8918,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
LLVM_FALLTHROUGH;
}
default:
- return denormalsEnabledForType(Op.getValueType()) &&
+ return denormalsEnabledForType(DAG, Op.getValueType()) &&
DAG.isKnownNeverSNaN(Op);
}
@@ -8914,7 +8929,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
SDValue SITargetLowering::getCanonicalConstantFP(
SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
// Flush denormals to 0 if not enabled.
- if (C.isDenormal() && !denormalsEnabledForType(VT))
+ if (C.isDenormal() && !denormalsEnabledForType(DAG, VT))
return DAG.getConstantFP(0.0, SL, VT);
if (C.isNaN()) {
@@ -9452,8 +9467,8 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
// Only do this if we are not trying to support denormals. v_mad_f32 does not
// support denormals ever.
- if (((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
- (VT == MVT::f16 && !Subtarget->hasFP16Denormals() &&
+ if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) ||
+ (VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) &&
getSubtarget()->hasMadF16())) &&
isOperationLegal(ISD::FMAD, VT))
return ISD::FMAD;
@@ -10964,14 +10979,14 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
return false;
}
-bool SITargetLowering::denormalsEnabledForType(EVT VT) const {
+bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG,
+ EVT VT) const {
switch (VT.getScalarType().getSimpleVT().SimpleTy) {
case MVT::f32:
- return Subtarget->hasFP32Denormals();
+ return hasFP32Denormals(DAG.getMachineFunction());
case MVT::f64:
- return Subtarget->hasFP64Denormals();
case MVT::f16:
- return Subtarget->hasFP16Denormals();
+ return hasFP64FP16Denormals(DAG.getMachineFunction());
default:
return false;
}
OpenPOWER on IntegriCloud