diff options
| author | Austin Kerbow <Austin.Kerbow@amd.com> | 2019-10-21 22:18:26 +0000 |
|---|---|---|
| committer | Austin Kerbow <Austin.Kerbow@amd.com> | 2019-10-21 22:18:26 +0000 |
| commit | 97263fa2ddd21661b90085845fed61b9b5dec367 (patch) | |
| tree | 2d02dfdf3bc88dec74741516ce6116d937a030d6 /llvm/lib/Target | |
| parent | 3434472ed74141848634b5eb3cd625d651e22562 (diff) | |
| download | bcm5719-llvm-97263fa2ddd21661b90085845fed61b9b5dec367.tar.gz bcm5719-llvm-97263fa2ddd21661b90085845fed61b9b5dec367.zip | |
AMDGPU/GlobalISel: Legalize fast unsafe FDIV
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69231
llvm-svn: 375460
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 88 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 8 |
2 files changed, 90 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index b226f6c2d2e..5aba35a19ce 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -336,6 +336,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor({S32, S64}); auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS}) .customFor({S32, S64}); + auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV) + .customFor({S32, S64}); if (ST.has16BitInsts()) { if (ST.hasVOP3PInsts()) @@ -344,6 +346,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, FPOpActions.legalFor({S16}); TrigActions.customFor({S16}); + FDIVActions.customFor({S16}); } auto &MinNumMaxNum = getActionDefinitionsBuilder({ @@ -375,6 +378,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + FDIVActions + .scalarize(0) + .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + getActionDefinitionsBuilder({G_FNEG, G_FABS}) .legalFor(FPTypesPK16) .clampMaxNumElements(0, S16, 2) @@ -1107,6 +1114,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, return legalizeLoad(MI, MRI, B, Observer); case TargetOpcode::G_FMAD: return legalizeFMad(MI, MRI, B); + case TargetOpcode::G_FDIV: + return legalizeFDIV(MI, MRI, B); default: return false; } @@ -1810,9 +1819,80 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( return false; } -bool AMDGPULegalizerInfo::legalizeFDIVFast(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { +bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + + if (legalizeFastUnsafeFDIV(MI, MRI, B)) + return true; + + return false; +} + +bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + uint16_t Flags = MI.getFlags(); + + LLT ResTy = MRI.getType(Res); + LLT S32 = LLT::scalar(32); + LLT S64 = LLT::scalar(64); + + const MachineFunction &MF = B.getMF(); + bool Unsafe = + MF.getTarget().Options.UnsafeFPMath || MI.getFlag(MachineInstr::FmArcp); + + if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64) + return false; + + if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals()) + return false; + + if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) { + // 1 / x -> RCP(x) + if (CLHS->isExactlyValue(1.0)) { + B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false) + .addUse(RHS) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; + } + + // -1 / x -> RCP( FNEG(x) ) + if (CLHS->isExactlyValue(-1.0)) { + auto FNeg = B.buildFNeg(ResTy, RHS, Flags); + B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false) + .addUse(FNeg.getReg(0)) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; + } + } + + // x / y -> x * (1.0 / y) + if (Unsafe) { + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false) + .addUse(RHS) + .setMIFlags(Flags); + B.buildFMul(Res, LHS, RCP, Flags); + + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(2).getReg(); @@ -2029,7 +2109,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_ID); case Intrinsic::amdgcn_fdiv_fast: - return legalizeFDIVFast(MI, MRI, B); + return legalizeFDIVFastIntrin(MI, MRI, B); case Intrinsic::amdgcn_is_shared: return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS); case Intrinsic::amdgcn_is_private: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index e2edadc9697..d0fba23a868 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -81,8 +81,12 @@ public: MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; - bool legalizeFDIVFast(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) const; + bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; |

