diff options
author | Austin Kerbow <Austin.Kerbow@amd.com> | 2019-07-30 18:49:16 +0000 |
---|---|---|
committer | Austin Kerbow <Austin.Kerbow@amd.com> | 2019-07-30 18:49:16 +0000 |
commit | c99f62e3136c620fc87c9ec8cb25f7a22305b4d6 (patch) | |
tree | 1de05fc0cc0c655c8246bea8bf6ae300160c7007 /llvm/lib | |
parent | 52b87ac32f5726c83a108b6129621813d1e1fd2a (diff) | |
download | bcm5719-llvm-c99f62e3136c620fc87c9ec8cb25f7a22305b4d6.tar.gz bcm5719-llvm-c99f62e3136c620fc87c9ec8cb25f7a22305b4d6.zip |
[AMDGPU/GlobalISel] Add llvm.amdgcn.fdiv.fast legalization.
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: volkan, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64966
llvm-svn: 367344
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 |
4 files changed, 47 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b7a73326b85..34355f58399 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -697,17 +697,19 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional<unsigned> Flags) { - return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}); + return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags); } MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional<unsigned> Flags) { - return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}); + return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags); } MachineInstrBuilder diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index aff85b118f5..139992c5161 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1274,6 +1274,42 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( return false; } +bool AMDGPULegalizerInfo::legalizeFDIVFast(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + uint16_t Flags = MI.getFlags(); + + LLT S32 = LLT::scalar(32); + LLT S1 = LLT::scalar(1); + + auto Abs = B.buildFAbs(S32, RHS, Flags); + const APFloat C0Val(1.0f); + + auto C0 = B.buildConstant(S32, 0x6f800000); + auto C1 = B.buildConstant(S32, 0x2f800000); + auto C2 = B.buildConstant(S32, FloatToBits(1.0f)); + + auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags); + auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags); + + auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags); + + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false) + .addUse(Mul0.getReg(0)) + .setMIFlags(Flags); + + auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags); + + B.buildFMul(Res, Sel, Mul1, Flags); + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -1388,6 +1424,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, case Intrinsic::amdgcn_dispatch_id: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_ID); + case Intrinsic::amdgcn_fdiv_fast: + return legalizeFDIVFast(MI, MRI, B); default: return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 3f1cc1d265d..018ae31fa3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -65,6 +65,9 @@ public: MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; + bool legalizeFDIVFast(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9b454665030..7803b68bf33 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1958,7 +1958,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_udot4: case Intrinsic::amdgcn_sdot8: case Intrinsic::amdgcn_udot8: - case Intrinsic::amdgcn_fdiv_fast: case Intrinsic::amdgcn_wwm: case Intrinsic::amdgcn_wqm: return getDefaultMappingVOP(MI); |