diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-17 12:20:01 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-17 12:20:01 +0000 |
| commit | 6aebcd5499886d6e368e7c0623f5684add898f1d (patch) | |
| tree | 3a640f3b65b0a61a469a41e85e5e42fc1bfd008a /llvm/lib | |
| parent | 6aafc5e19d30a517f39a8ff7d7705d3b0b39c3ec (diff) | |
| download | bcm5719-llvm-6aebcd5499886d6e368e7c0623f5684add898f1d.tar.gz bcm5719-llvm-6aebcd5499886d6e368e7c0623f5684add898f1d.zip | |
AMDGPU/GlobalISel: Legalize G_INTRINSIC_TRUNC
llvm-svn: 361027
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 71 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 3 |
2 files changed, 70 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 64ae29ec998..b0c01255c1f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -289,17 +289,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .legalFor({{S32, S32}, {S32, S64}}) .scalarize(0); - getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND}) + getActionDefinitionsBuilder(G_INTRINSIC_ROUND) .legalFor({S32, S64}) .scalarize(0); if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { - getActionDefinitionsBuilder(G_FRINT) + getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FRINT}) .legalFor({S32, S64}) .clampScalar(0, S32, S64) .scalarize(0); } else { - getActionDefinitionsBuilder(G_FRINT) + getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FRINT}) .legalFor({S32}) .customFor({S64}) .clampScalar(0, S32, S64) @@ -689,6 +689,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, return legalizeAddrSpaceCast(MI, MRI, MIRBuilder); case TargetOpcode::G_FRINT: return legalizeFrint(MI, MRI, MIRBuilder); + case TargetOpcode::G_INTRINSIC_TRUNC: + return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder); default: return false; } @@ -872,3 +874,66 @@ bool AMDGPULegalizerInfo::legalizeFrint( MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2); return true; } + +static MachineInstrBuilder extractF64Exponent(unsigned Hi, + MachineIRBuilder &B) { + const unsigned FractBits = 52; + const unsigned ExpBits = 11; + LLT S32 = LLT::scalar(32); + + auto Const0 = B.buildConstant(S32, FractBits - 32); + auto Const1 = B.buildConstant(S32, ExpBits); + + auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false) + .addUse(Const0.getReg(0)) + .addUse(Const1.getReg(0)); + + return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023)); +} + +bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + + unsigned Src = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(Src); + assert(Ty.isScalar() && Ty.getSizeInBits() == 64); + + LLT S1 = LLT::scalar(1); + LLT S32 = LLT::scalar(32); + LLT S64 = LLT::scalar(64); + + // TODO: Should this use extract since the low half is unused? + auto Unmerge = B.buildUnmerge({S32, S32}, Src); + unsigned Hi = Unmerge.getReg(1); + + // Extract the upper half, since this is where we will find the sign and + // exponent. + auto Exp = extractF64Exponent(Hi, B); + + const unsigned FractBits = 52; + + // Extract the sign bit. + const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31); + auto SignBit = B.buildAnd(S32, Hi, SignBitMask); + + const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1); + + const auto Zero32 = B.buildConstant(S32, 0); + + // Extend back to 64-bits. + auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)}); + + auto Shr = B.buildAShr(S64, FractMask, Exp); + auto Not = B.buildNot(S64, Shr); + auto Tmp0 = B.buildAnd(S64, Src, Not); + auto FiftyOne = B.buildConstant(S32, FractBits - 1); + + auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32); + auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne); + + auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0); + B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1); + return true; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 13ac2408a2e..3591f4f35de 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -40,7 +40,8 @@ public: MachineIRBuilder &MIRBuilder) const; bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; - + bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; }; } // End llvm namespace. #endif |

