diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 46 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Utils.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 56 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 2 |
4 files changed, 128 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ddcdd9debdd..d5a4e891ce6 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1384,6 +1384,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: case TargetOpcode::G_FDIV: case TargetOpcode::G_FREM: case TargetOpcode::G_FCEIL: @@ -1696,6 +1702,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerMinMax(MI, TypeIdx, Ty); case G_FCOPYSIGN: return lowerFCopySign(MI, TypeIdx, Ty); + case G_FMINNUM: + case G_FMAXNUM: + return lowerFMinNumMaxNum(MI); } } @@ -2307,6 +2316,12 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_SMAX: case G_UMIN: case G_UMAX: + case G_FMINNUM: + case G_FMAXNUM: + case G_FMINNUM_IEEE: + case G_FMAXNUM_IEEE: + case G_FMINIMUM: + case G_FMAXIMUM: return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: @@ -3273,3 +3288,34 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { + unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? + TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; + + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Dst); + + if (!MI.getFlag(MachineInstr::FmNoNans)) { + // Insert canonicalizes if it's possible we need to quiet to get correct + // sNaN behavior. + + // Note this must be done here, and not as an optimization combine in the + // absence of a dedicate quiet-snan instruction as we're using an + // omni-purpose G_FCANONICALIZE. + if (!isKnownNeverSNaN(Src0, MRI)) + Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); + + if (!isKnownNeverSNaN(Src1, MRI)) + Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); + } + + // If there are no nans, it's safe to simply replace this with the non-IEEE + // version. + MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags()); + MI.eraseFromParent(); + return Legalized; +} diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index ed168a6188b..deccdc6c750 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -367,6 +367,31 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1, return None; } +bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, + bool SNaN) { + const MachineInstr *DefMI = MRI.getVRegDef(Val); + if (!DefMI) + return false; + + if (DefMI->getFlag(MachineInstr::FmNoNans)) + return true; + + if (SNaN) { + // FP operations quiet. For now, just handle the ones inserted during + // legalization. + switch (DefMI->getOpcode()) { + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FCANONICALIZE: + return true; + default: + return false; + } + } + + return false; +} + void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 38e15171128..df58e7dbef0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -15,7 +15,7 @@ #include "AMDGPULegalizerInfo.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" - +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" @@ -173,6 +173,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, S32, S64, S16 }; + const std::initializer_list<LLT> FPTypesPK16 = { + S32, S64, S16, V2S16 + }; + setAction({G_BRCOND, S1}, Legal); // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more @@ -271,6 +275,27 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, FPOpActions.legalFor({S16}); } + auto &MinNumMaxNum = getActionDefinitionsBuilder({ + G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE}); + + if (ST.hasVOP3PInsts()) { + MinNumMaxNum.customFor(FPTypesPK16) + .clampMaxNumElements(0, S16, 2) + .clampScalar(0, S16, S64) + .scalarize(0); + } else if (ST.has16BitInsts()) { + MinNumMaxNum.customFor(FPTypes16) + .clampScalar(0, S16, S64) + .scalarize(0); + } else { + MinNumMaxNum.customFor(FPTypesBase) + .clampScalar(0, S32, S64) + .scalarize(0); + } + + // TODO: Implement + getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower(); + if (ST.hasVOP3PInsts()) FPOpActions.clampMaxNumElements(0, S16, 2); FPOpActions @@ -757,6 +782,11 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, return legalizeITOFP(MI, MRI, MIRBuilder, true); case TargetOpcode::G_UITOFP: return legalizeITOFP(MI, MRI, MIRBuilder, false); + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + return legalizeMinNumMaxNum(MI, MRI, MIRBuilder); default: return false; } @@ -1064,6 +1094,30 @@ bool AMDGPULegalizerInfo::legalizeITOFP( return true; } +bool AMDGPULegalizerInfo::legalizeMinNumMaxNum( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + MachineFunction &MF = B.getMF(); + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + + const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE || + MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE; + + // With ieee_mode disabled, the instructions have the correct behavior + // already for G_FMINNUM/G_FMAXNUM + if (!MFI->getMode().IEEE) + return !IsIEEEOp; + + if (IsIEEEOp) + return true; + + MachineIRBuilder HelperBuilder(MI); + GISelObserverWrapper DummyObserver; + LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); + HelperBuilder.setMBB(*MI.getParent()); + return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index b0131084887..f5a6ff7d96c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -49,6 +49,8 @@ public: MachineIRBuilder &MIRBuilder) const; bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, bool Signed) const; + bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; |