diff options
author | Austin Kerbow <Austin.Kerbow@amd.com> | 2019-10-22 17:39:26 -0700 |
---|---|---|
committer | Austin Kerbow <Austin.Kerbow@amd.com> | 2019-10-25 11:07:17 -0700 |
commit | c35b358b741b942aa89acb1fe0d22d4126287493 (patch) | |
tree | 8e7510270d705295a9863dcc849609778a2b8d84 /llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | |
parent | 118ceea5c364bd69c52b2a24acd543c28fb35fcb (diff) | |
download | bcm5719-llvm-c35b358b741b942aa89acb1fe0d22d4126287493.tar.gz bcm5719-llvm-c35b358b741b942aa89acb1fe0d22d4126287493.zip |
AMDGPU/GlobalISel: Legalize FDIV16
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69347
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 5aba35a19ce..200946f2c7d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1823,10 +1823,16 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { B.setInstr(MI); + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT S16 = LLT::scalar(16); if (legalizeFastUnsafeFDIV(MI, MRI, B)) return true; + if (DstTy == S16) + return legalizeFDIV16(MI, MRI, B); + return false; } @@ -1890,6 +1896,39 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, return false; } +bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + uint16_t Flags = MI.getFlags(); + + LLT S16 = LLT::scalar(16); + LLT S32 = LLT::scalar(32); + + auto LHSExt = B.buildFPExt(S32, LHS, Flags); + auto RHSExt = B.buildFPExt(S32, RHS, Flags); + + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false) + .addUse(RHSExt.getReg(0)) + .setMIFlags(Flags); + + auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags); + auto RDst = B.buildFPTrunc(S16, QUOT, Flags); + + B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false) + .addUse(RDst.getReg(0)) + .addUse(RHS) + .addUse(LHS) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { |