diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-13 00:44:35 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-13 00:44:35 +0000 |
| commit | 4d3391803462433b05a3344e6c37435f725637c4 (patch) | |
| tree | d4c536b7baf26b1676377613d61ac7ea0af9c208 /llvm/lib | |
| parent | 38f6b3fd8dd7dc3a3b4eae63738d5d3a741b2227 (diff) | |
| download | bcm5719-llvm-4d3391803462433b05a3344e6c37435f725637c4.tar.gz bcm5719-llvm-4d3391803462433b05a3344e6c37435f725637c4.zip | |
AMDGPU/GlobalISel: Legalize G_FMAD
Unlike SelectionDAG, treat this as a normally legalizable operation.
In SelectionDAG this is supposed to only ever formed if it's legal,
but I've found that to be restricting. For AMDGPU this is contextually
legal depending on whether denormal flushing is allowed in the use
function.
Technically we currently treat the denormal mode as a subtarget
feature, so custom lowering could be avoided. However I consider this
to be a defect, and this should be contextually dependent on the
controllable rounding mode of the parent function.
llvm-svn: 371800
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 32 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 |
4 files changed, 51 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 93271dc9609..fbb817e127a 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1949,6 +1949,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_FMAD: + return lowerFMad(MI); case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -3914,6 +3916,19 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { return Legalized; } +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { + // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned Flags = MI.getFlags(); + + auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), + Flags); + MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { const unsigned NumDst = MI.getNumOperands() - 1; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 9ea4a81f480..17db2fba592 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -397,6 +397,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .clampScalar(0, S32, S64); + // Whether this is legal depends on the floating point mode for the function. + auto &FMad = getActionDefinitionsBuilder(G_FMAD); + if (ST.hasMadF16()) + FMad.customFor({S32, S16}); + else + FMad.customFor({S32}); + FMad.scalarize(0) + .lower(); + getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, {S32, S1}, {S64, S1}, {S16, S1}, @@ -1050,6 +1059,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, return legalizeGlobalValue(MI, MRI, B); case TargetOpcode::G_LOAD: return legalizeLoad(MI, MRI, B, Observer); + case TargetOpcode::G_FMAD: + return legalizeFMad(MI, MRI, B); default: return false; } @@ -1546,6 +1557,27 @@ bool AMDGPULegalizerInfo::legalizeLoad( return true; } +bool AMDGPULegalizerInfo::legalizeFMad( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + assert(Ty.isScalar()); + + // TODO: Always legal with future ftz flag. + if (Ty == LLT::scalar(32) && !ST.hasFP32Denormals()) + return true; + if (Ty == LLT::scalar(16) && !ST.hasFP16Denormals()) + return true; + + MachineFunction &MF = B.getMF(); + + MachineIRBuilder HelperBuilder(MI); + GISelObserverWrapper DummyObserver; + LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); + HelperBuilder.setMBB(*MI.getParent()); + return Helper.lowerFMad(MI) == LegalizerHelper::Legalized; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 24777d49b26..99564a04dbb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -64,6 +64,9 @@ public: MachineIRBuilder &B, GISelChangeObserver &Observer) const; + bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 60f0d37982f..c898f281442 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1780,6 +1780,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_FPTOUI: case AMDGPU::G_FMUL: case AMDGPU::G_FMA: + case AMDGPU::G_FMAD: case AMDGPU::G_FSQRT: case AMDGPU::G_SITOFP: case AMDGPU::G_UITOFP: |

