diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 39 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 2 |
3 files changed, 44 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index df770f6664c..1c5b6fc3ed8 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -698,8 +698,9 @@ MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res, } MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res, - const SrcOp &Op) { - return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op); + const SrcOp &Op, + Optional<unsigned> Flags) { + return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags); } MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 5aba35a19ce..200946f2c7d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1823,10 +1823,16 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { B.setInstr(MI); + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT S16 = LLT::scalar(16); if (legalizeFastUnsafeFDIV(MI, MRI, B)) return true; + if (DstTy == S16) + return legalizeFDIV16(MI, MRI, B); + return false; } @@ -1890,6 +1896,39 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, return false; } +bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + uint16_t Flags = MI.getFlags(); + + LLT S16 = LLT::scalar(16); + LLT S32 = LLT::scalar(32); + + auto LHSExt = B.buildFPExt(S32, LHS, Flags); + auto RHSExt = B.buildFPExt(S32, RHS, Flags); + + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false) + .addUse(RHSExt.getReg(0)) + .setMIFlags(Flags); + + auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags); + auto RDst = B.buildFPTrunc(S16, QUOT, Flags); + + B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false) + .addUse(RDst.getReg(0)) + .addUse(RHS) + .addUse(LHS) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index d0fba23a868..357142d9f3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -83,6 +83,8 @@ public: bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, |