summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp46
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp56
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h2
4 files changed, 128 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index ddcdd9debdd..d5a4e891ce6 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1384,6 +1384,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FREM:
case TargetOpcode::G_FCEIL:
@@ -1696,6 +1702,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerMinMax(MI, TypeIdx, Ty);
case G_FCOPYSIGN:
return lowerFCopySign(MI, TypeIdx, Ty);
+ case G_FMINNUM:
+ case G_FMAXNUM:
+ return lowerFMinNumMaxNum(MI);
}
}
@@ -2307,6 +2316,12 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_SMAX:
case G_UMIN:
case G_UMAX:
+ case G_FMINNUM:
+ case G_FMAXNUM:
+ case G_FMINNUM_IEEE:
+ case G_FMAXNUM_IEEE:
+ case G_FMINIMUM:
+ case G_FMAXIMUM:
return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
case G_SHL:
case G_LSHR:
@@ -3273,3 +3288,34 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
+ unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
+ TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ if (!MI.getFlag(MachineInstr::FmNoNans)) {
+ // Insert canonicalizes if it's possible we need to quiet to get correct
+ // sNaN behavior.
+
+ // Note this must be done here, and not as an optimization combine in the
+ // absence of a dedicate quiet-snan instruction as we're using an
+ // omni-purpose G_FCANONICALIZE.
+ if (!isKnownNeverSNaN(Src0, MRI))
+ Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
+
+ if (!isKnownNeverSNaN(Src1, MRI))
+ Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
+ }
+
+ // If there are no nans, it's safe to simply replace this with the non-IEEE
+ // version.
+ MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index ed168a6188b..deccdc6c750 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -367,6 +367,31 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
return None;
}
+bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+ bool SNaN) {
+ const MachineInstr *DefMI = MRI.getVRegDef(Val);
+ if (!DefMI)
+ return false;
+
+ if (DefMI->getFlag(MachineInstr::FmNoNans))
+ return true;
+
+ if (SNaN) {
+ // FP operations quiet. For now, just handle the ones inserted during
+ // legalization.
+ switch (DefMI->getOpcode()) {
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FCANONICALIZE:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 38e15171128..df58e7dbef0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -15,7 +15,7 @@
#include "AMDGPULegalizerInfo.h"
#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
-
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -173,6 +173,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
S32, S64, S16
};
+ const std::initializer_list<LLT> FPTypesPK16 = {
+ S32, S64, S16, V2S16
+ };
+
setAction({G_BRCOND, S1}, Legal);
// TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
@@ -271,6 +275,27 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
FPOpActions.legalFor({S16});
}
+ auto &MinNumMaxNum = getActionDefinitionsBuilder({
+ G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
+
+ if (ST.hasVOP3PInsts()) {
+ MinNumMaxNum.customFor(FPTypesPK16)
+ .clampMaxNumElements(0, S16, 2)
+ .clampScalar(0, S16, S64)
+ .scalarize(0);
+ } else if (ST.has16BitInsts()) {
+ MinNumMaxNum.customFor(FPTypes16)
+ .clampScalar(0, S16, S64)
+ .scalarize(0);
+ } else {
+ MinNumMaxNum.customFor(FPTypesBase)
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
+ }
+
+ // TODO: Implement
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
+
if (ST.hasVOP3PInsts())
FPOpActions.clampMaxNumElements(0, S16, 2);
FPOpActions
@@ -757,6 +782,11 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
return legalizeITOFP(MI, MRI, MIRBuilder, true);
case TargetOpcode::G_UITOFP:
return legalizeITOFP(MI, MRI, MIRBuilder, false);
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ return legalizeMinNumMaxNum(MI, MRI, MIRBuilder);
default:
return false;
}
@@ -1064,6 +1094,30 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
return true;
}
+bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ MachineFunction &MF = B.getMF();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
+ MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
+
+ // With ieee_mode disabled, the instructions have the correct behavior
+ // already for G_FMINNUM/G_FMAXNUM
+ if (!MFI->getMode().IEEE)
+ return !IsIEEEOp;
+
+ if (IsIEEEOp)
+ return true;
+
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
+ HelperBuilder.setMBB(*MI.getParent());
+ return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
+}
+
// Return the use branch instruction, otherwise null if the usage is invalid.
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index b0131084887..f5a6ff7d96c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -49,6 +49,8 @@ public:
MachineIRBuilder &MIRBuilder) const;
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder, bool Signed) const;
+ bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
Register getLiveInRegister(MachineRegisterInfo &MRI,
Register Reg, LLT Ty) const;
OpenPOWER on IntegriCloud