diff options
author | Austin Kerbow <Austin.Kerbow@amd.com> | 2019-11-17 16:43:59 -0800 |
---|---|---|
committer | Austin Kerbow <Austin.Kerbow@amd.com> | 2019-11-19 21:02:27 -0800 |
commit | f3225f2abe78d8a25ee5deea4265b447e7b7d5ee (patch) | |
tree | f3f11e1628dc5abb866202ce14d86fe87b56763a /llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | |
parent | 47feae5dd61d891d4c1382b9784738111b4f9396 (diff) | |
download | bcm5719-llvm-f3225f2abe78d8a25ee5deea4265b447e7b7d5ee.tar.gz bcm5719-llvm-f3225f2abe78d8a25ee5deea4265b447e7b7d5ee.zip |
AMDGPU/GlobalISel: Legalize FDIV64
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70403
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index b4538097a21..c21102fd611 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1867,6 +1867,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, LLT DstTy = MRI.getType(Dst); LLT S16 = LLT::scalar(16); LLT S32 = LLT::scalar(32); + LLT S64 = LLT::scalar(64); if (legalizeFastUnsafeFDIV(MI, MRI, B)) return true; @@ -1875,6 +1876,8 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, return legalizeFDIV16(MI, MRI, B); if (DstTy == S32) return legalizeFDIV32(MI, MRI, B); + if (DstTy == S64) + return legalizeFDIV64(MI, MRI, B); return false; } @@ -2072,6 +2075,88 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + uint16_t Flags = MI.getFlags(); + + LLT S64 = LLT::scalar(64); + LLT S1 = LLT::scalar(1); + + auto One = B.buildFConstant(S64, 1.0); + + auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false) + .addUse(RHS) + .addUse(RHS) + .addUse(LHS) + .setMIFlags(Flags); + + auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags); + + auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64}, false) + .addUse(DivScale0.getReg(0)) + .setMIFlags(Flags); + + auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags); + auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags); + auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags); + + auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false) + .addUse(LHS) + .addUse(RHS) + .addUse(LHS) + .setMIFlags(Flags); + + auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags); + auto Mul = B.buildMul(S64, DivScale1.getReg(0), Fma3, Flags); + auto Fma4 = B.buildFMA(S64, NegDivScale0, Mul, DivScale1.getReg(0), Flags); + + Register Scale; + if (!ST.hasUsableDivScaleConditionOutput()) { + // Workaround a hardware bug on SI where the condition output from div_scale + // is not usable. + + Scale = MRI.createGenericVirtualRegister(S1); + + LLT S32 = LLT::scalar(32); + + auto NumUnmerge = B.buildUnmerge(S32, LHS); + auto DenUnmerge = B.buildUnmerge(S32, RHS); + auto Scale0Unmerge = B.buildUnmerge(S32, DivScale0); + auto Scale1Unmerge = B.buildUnmerge(S32, DivScale1); + + auto CmpNum = B.buildICmp(ICmpInst::ICMP_EQ, S1, NumUnmerge.getReg(1), + Scale1Unmerge.getReg(1)); + auto CmpDen = B.buildICmp(ICmpInst::ICMP_EQ, S1, DenUnmerge.getReg(1), + Scale0Unmerge.getReg(1)); + B.buildXor(Scale, CmpNum, CmpDen); + } else { + Scale = DivScale1.getReg(1); + } + + auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64}, false) + .addUse(Fma4.getReg(0)) + .addUse(Fma3.getReg(0)) + .addUse(Mul.getReg(0)) + .addUse(Scale) + .setMIFlags(Flags); + + B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, {S64}, false) + .addDef(Res) + .addUse(Fmas.getReg(0)) + .addUse(RHS) + .addUse(LHS) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { |