summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
diff options
context:
space:
mode:
authorAustin Kerbow <Austin.Kerbow@amd.com>2019-11-17 16:43:59 -0800
committerAustin Kerbow <Austin.Kerbow@amd.com>2019-11-19 21:02:27 -0800
commitf3225f2abe78d8a25ee5deea4265b447e7b7d5ee (patch)
treef3f11e1628dc5abb866202ce14d86fe87b56763a /llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
parent47feae5dd61d891d4c1382b9784738111b4f9396 (diff)
downloadbcm5719-llvm-f3225f2abe78d8a25ee5deea4265b447e7b7d5ee.tar.gz
bcm5719-llvm-f3225f2abe78d8a25ee5deea4265b447e7b7d5ee.zip
AMDGPU/GlobalISel: Legalize FDIV64
Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70403
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp85
1 files changed, 85 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index b4538097a21..c21102fd611 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1867,6 +1867,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
LLT DstTy = MRI.getType(Dst);
LLT S16 = LLT::scalar(16);
LLT S32 = LLT::scalar(32);
+ LLT S64 = LLT::scalar(64);
if (legalizeFastUnsafeFDIV(MI, MRI, B))
return true;
@@ -1875,6 +1876,8 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
return legalizeFDIV16(MI, MRI, B);
if (DstTy == S32)
return legalizeFDIV32(MI, MRI, B);
+ if (DstTy == S64)
+ return legalizeFDIV64(MI, MRI, B);
return false;
}
@@ -2072,6 +2075,88 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ uint16_t Flags = MI.getFlags();
+
+ LLT S64 = LLT::scalar(64);
+ LLT S1 = LLT::scalar(1);
+
+ auto One = B.buildFConstant(S64, 1.0);
+
+ auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
+ .addUse(RHS)
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags);
+
+ auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64}, false)
+ .addUse(DivScale0.getReg(0))
+ .setMIFlags(Flags);
+
+ auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags);
+ auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags);
+ auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags);
+
+ auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
+ .addUse(LHS)
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags);
+ auto Mul = B.buildMul(S64, DivScale1.getReg(0), Fma3, Flags);
+ auto Fma4 = B.buildFMA(S64, NegDivScale0, Mul, DivScale1.getReg(0), Flags);
+
+ Register Scale;
+ if (!ST.hasUsableDivScaleConditionOutput()) {
+ // Workaround a hardware bug on SI where the condition output from div_scale
+ // is not usable.
+
+ Scale = MRI.createGenericVirtualRegister(S1);
+
+ LLT S32 = LLT::scalar(32);
+
+ auto NumUnmerge = B.buildUnmerge(S32, LHS);
+ auto DenUnmerge = B.buildUnmerge(S32, RHS);
+ auto Scale0Unmerge = B.buildUnmerge(S32, DivScale0);
+ auto Scale1Unmerge = B.buildUnmerge(S32, DivScale1);
+
+ auto CmpNum = B.buildICmp(ICmpInst::ICMP_EQ, S1, NumUnmerge.getReg(1),
+ Scale1Unmerge.getReg(1));
+ auto CmpDen = B.buildICmp(ICmpInst::ICMP_EQ, S1, DenUnmerge.getReg(1),
+ Scale0Unmerge.getReg(1));
+ B.buildXor(Scale, CmpNum, CmpDen);
+ } else {
+ Scale = DivScale1.getReg(1);
+ }
+
+ auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64}, false)
+ .addUse(Fma4.getReg(0))
+ .addUse(Fma3.getReg(0))
+ .addUse(Mul.getReg(0))
+ .addUse(Scale)
+ .setMIFlags(Flags);
+
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, {S64}, false)
+ .addDef(Res)
+ .addUse(Fmas.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
OpenPOWER on IntegriCloud