diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index aff85b118f5..139992c5161 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1274,6 +1274,42 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( return false; } +bool AMDGPULegalizerInfo::legalizeFDIVFast(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + uint16_t Flags = MI.getFlags(); + + LLT S32 = LLT::scalar(32); + LLT S1 = LLT::scalar(1); + + auto Abs = B.buildFAbs(S32, RHS, Flags); + const APFloat C0Val(1.0f); + + auto C0 = B.buildConstant(S32, 0x6f800000); + auto C1 = B.buildConstant(S32, 0x2f800000); + auto C2 = B.buildConstant(S32, FloatToBits(1.0f)); + + auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags); + auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags); + + auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags); + + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false) + .addUse(Mul0.getReg(0)) + .setMIFlags(Flags); + + auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags); + + B.buildFMul(Res, Sel, Mul1, Flags); + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -1388,6 +1424,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, case Intrinsic::amdgcn_dispatch_id: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_ID); + case Intrinsic::amdgcn_fdiv_fast: + return legalizeFDIVFast(MI, MRI, B); default: return true; } |