diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 37 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 2 |
2 files changed, 39 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 997f28a5c3c..c4ca42e6af3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -284,6 +284,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .legalFor({{S32, S32}, {S64, S32}}) .lowerFor({{S32, S64}}) + .customFor({{S64, S64}}) .scalarize(0); getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) @@ -694,6 +695,10 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, return legalizeFceil(MI, MRI, MIRBuilder); case TargetOpcode::G_INTRINSIC_TRUNC: return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder); + case TargetOpcode::G_SITOFP: + return legalizeITOFP(MI, MRI, MIRBuilder, true); + case TargetOpcode::G_UITOFP: + return legalizeITOFP(MI, MRI, MIRBuilder, false); default: return false; } @@ -968,3 +973,35 @@ bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc( B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1); return true; } + +bool AMDGPULegalizerInfo::legalizeITOFP( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, bool Signed) const { + B.setInstr(MI); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); + + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64); + + auto Unmerge = B.buildUnmerge({S32, S32}, Src); + + auto CvtHi = Signed ? + B.buildSITOFP(S64, Unmerge.getReg(1)) : + B.buildUITOFP(S64, Unmerge.getReg(1)); + + auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0)); + + auto ThirtyTwo = B.buildConstant(S32, 32); + auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false) + .addUse(CvtHi.getReg(0)) + .addUse(ThirtyTwo.getReg(0)); + + // TODO: Should this propagate fast-math-flags? + B.buildFAdd(Dst, LdExp, CvtLo); + MI.eraseFromParent(); + return true; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 7dac7a2f010..306a5e5011c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -44,6 +44,8 @@ public: MachineIRBuilder &MIRBuilder) const; bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; + bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, bool Signed) const; }; } // End llvm namespace. #endif |

