summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAustin Kerbow <Austin.Kerbow@amd.com>2019-07-30 18:49:16 +0000
committerAustin Kerbow <Austin.Kerbow@amd.com>2019-07-30 18:49:16 +0000
commitc99f62e3136c620fc87c9ec8cb25f7a22305b4d6 (patch)
tree1de05fc0cc0c655c8246bea8bf6ae300160c7007 /llvm/lib
parent52b87ac32f5726c83a108b6129621813d1e1fd2a (diff)
downloadbcm5719-llvm-c99f62e3136c620fc87c9ec8cb25f7a22305b4d6.tar.gz
bcm5719-llvm-c99f62e3136c620fc87c9ec8cb25f7a22305b4d6.zip
[AMDGPU/GlobalISel] Add llvm.amdgcn.fdiv.fast legalization.
Reviewers: arsenm Reviewed By: arsenm Subscribers: volkan, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64966 llvm-svn: 367344
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp1
4 files changed, 47 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index b7a73326b85..34355f58399 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -697,17 +697,19 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
const DstOp &Res,
const SrcOp &Op0,
- const SrcOp &Op1) {
+ const SrcOp &Op1,
+ Optional<unsigned> Flags) {
- return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1});
+ return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags);
}
MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res,
const SrcOp &Tst,
const SrcOp &Op0,
- const SrcOp &Op1) {
+ const SrcOp &Op1,
+ Optional<unsigned> Flags) {
- return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1});
+ return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags);
}
MachineInstrBuilder
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index aff85b118f5..139992c5161 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1274,6 +1274,42 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
return false;
}
+bool AMDGPULegalizerInfo::legalizeFDIVFast(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ uint16_t Flags = MI.getFlags();
+
+ LLT S32 = LLT::scalar(32);
+ LLT S1 = LLT::scalar(1);
+
+ auto Abs = B.buildFAbs(S32, RHS, Flags);
+ const APFloat C0Val(1.0f);
+
+ auto C0 = B.buildConstant(S32, 0x6f800000);
+ auto C1 = B.buildConstant(S32, 0x2f800000);
+ auto C2 = B.buildConstant(S32, FloatToBits(1.0f));
+
+ auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags);
+ auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
+
+ auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
+
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
+ .addUse(Mul0.getReg(0))
+ .setMIFlags(Flags);
+
+ auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
+
+ B.buildFMul(Res, Sel, Mul1, Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -1388,6 +1424,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
case Intrinsic::amdgcn_dispatch_id:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::DISPATCH_ID);
+ case Intrinsic::amdgcn_fdiv_fast:
+ return legalizeFDIVFast(MI, MRI, B);
default:
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 3f1cc1d265d..018ae31fa3d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -65,6 +65,9 @@ public:
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+ bool legalizeFDIVFast(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 9b454665030..7803b68bf33 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1958,7 +1958,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_udot4:
case Intrinsic::amdgcn_sdot8:
case Intrinsic::amdgcn_udot8:
- case Intrinsic::amdgcn_fdiv_fast:
case Intrinsic::amdgcn_wwm:
case Intrinsic::amdgcn_wqm:
return getDefaultMappingVOP(MI);
OpenPOWER on IntegriCloud