summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
diff options
context:
space:
mode:
authorAustin Kerbow <Austin.Kerbow@amd.com>2019-10-22 17:39:26 -0700
committerAustin Kerbow <Austin.Kerbow@amd.com>2019-10-25 11:07:17 -0700
commitc35b358b741b942aa89acb1fe0d22d4126287493 (patch)
tree8e7510270d705295a9863dcc849609778a2b8d84 /llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
parent118ceea5c364bd69c52b2a24acd543c28fb35fcb (diff)
downloadbcm5719-llvm-c35b358b741b942aa89acb1fe0d22d4126287493.tar.gz
bcm5719-llvm-c35b358b741b942aa89acb1fe0d22d4126287493.zip
AMDGPU/GlobalISel: Legalize FDIV16
Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, volkan, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69347
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp39
1 files changed, 39 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5aba35a19ce..200946f2c7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1823,10 +1823,16 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
B.setInstr(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT S16 = LLT::scalar(16);
if (legalizeFastUnsafeFDIV(MI, MRI, B))
return true;
+ if (DstTy == S16)
+ return legalizeFDIV16(MI, MRI, B);
+
return false;
}
@@ -1890,6 +1896,39 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
return false;
}
+bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ uint16_t Flags = MI.getFlags();
+
+ LLT S16 = LLT::scalar(16);
+ LLT S32 = LLT::scalar(32);
+
+ auto LHSExt = B.buildFPExt(S32, LHS, Flags);
+ auto RHSExt = B.buildFPExt(S32, RHS, Flags);
+
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
+ .addUse(RHSExt.getReg(0))
+ .setMIFlags(Flags);
+
+ auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags);
+ auto RDst = B.buildFPTrunc(S16, QUOT, Flags);
+
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
+ .addUse(RDst.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
OpenPOWER on IntegriCloud