diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-22 23:27:53 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-22 23:27:53 +0000 |
commit | d5c6515b6864f9434cc0e75632903cb4bffa6d4a (patch) | |
tree | 05e40ac70520d55e7a02175caf4339d2a0fcc898 /llvm/lib/Target | |
parent | f2cce02eb2431752eb9ffc0d0c8c7484bdadd75f (diff) | |
download | bcm5719-llvm-d5c6515b6864f9434cc0e75632903cb4bffa6d4a.tar.gz bcm5719-llvm-d5c6515b6864f9434cc0e75632903cb4bffa6d4a.zip |
AMDGPU: Fold FP clamp as modifier bit
The manual is unclear on the details of this. It's not
clear to me if denormals are not allowed with clamp,
or if that is only omod. Not allowing denorms for
fp16 or fp64 isn't useful so I also question if that
is really a restriction. Same with whether this is valid
without IEEE mode enabled.
llvm-svn: 295905
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 76 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOPInstructions.td | 1 |
6 files changed, 89 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 759a043d7e9..3c8a790ffb4 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -65,8 +65,8 @@ enum : uint64_t { SOPK_ZEXT = UINT64_C(1) << 38, SCALAR_STORE = UINT64_C(1) << 39, FIXED_SIZE = UINT64_C(1) << 40, - VOPAsmPrefer32Bit = UINT64_C(1) << 41 - + VOPAsmPrefer32Bit = UINT64_C(1) << 41, + HasFPClamp = UINT64_C(1) << 42 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index a5c0d4923d6..f223f57bfa3 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -66,6 +66,7 @@ public: MachineRegisterInfo *MRI; const SIInstrInfo *TII; const SIRegisterInfo *TRI; + const SISubtarget *ST; void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, @@ -75,6 +76,9 @@ public: void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const; + const MachineOperand *isClamp(const MachineInstr &MI) const; + bool tryFoldClamp(MachineInstr &MI); + public: SIFoldOperands() : MachineFunctionPass(ID) { initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); @@ -686,14 +690,75 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, } } +const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { + unsigned Op = MI.getOpcode(); + switch (Op) { + case AMDGPU::V_MAX_F32_e64: + case AMDGPU::V_MAX_F16_e64: { + if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm()) + return nullptr; + + // Make sure sources are identical. + const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() || + Src0->getSubReg() != AMDGPU::NoSubRegister) + return nullptr; + + // Can't fold up if we have modifiers. + if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || + TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) || + TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) + return nullptr; + return Src0; + } + default: + return nullptr; + } +} + +// We obviously have multiple uses in a clamp since the register is used twice +// in the same instruction. +static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) { + int Count = 0; + for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); + I != E; ++I) { + if (++Count > 1) + return false; + } + + return true; +} + +// FIXME: Does this need to check IEEE bit on function? +bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) { + const MachineOperand *ClampSrc = isClamp(MI); + if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg())) + return false; + + MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg()); + if (!TII->hasFPClamp(*Def)) + return false; + MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp); + if (!DefClamp) + return false; + + DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def << '\n'); + + // Clamp is applied after omod, so it is OK if omod is set. + DefClamp->setImm(1); + MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg()); + MI.eraseFromParent(); + return true; +} + bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); - MRI = &MF.getRegInfo(); - TII = ST.getInstrInfo(); + ST = &MF.getSubtarget<SISubtarget>(); + TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); @@ -705,8 +770,11 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { Next = std::next(I); MachineInstr &MI = *I; - if (!isSafeToFold(MI)) + if (!isSafeToFold(MI)) { + // TODO: Try omod also. + tryFoldClamp(MI); continue; + } MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 5523ec142ba..6da32785b23 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -78,6 +78,10 @@ class InstSI <dag outs, dag ins, string asm = "", // is unable to infer the encoding from the operands. field bit VOPAsmPrefer32Bit = 0; + // This bit indicates that this has a floating point result type, so + // the clamp modifier has floating point semantics. + field bit FPClamp = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -120,6 +124,7 @@ class InstSI <dag outs, dag ins, string asm = "", let TSFlags{39} = ScalarStore; let TSFlags{40} = FixedSize; let TSFlags{41} = VOPAsmPrefer32Bit; + let TSFlags{42} = FPClamp; let SchedRW = [Write32Bit]; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 5f53fd18917..ba59623a8e4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -474,6 +474,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; } + static bool hasFPClamp(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::HasFPClamp; + } + + bool hasFPClamp(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::HasFPClamp; + } + bool isVGPRCopy(const MachineInstr &MI) const { assert(MI.isCopy()); unsigned Dest = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index e832a0658d1..1c138feab43 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1086,6 +1086,7 @@ class VOPProfile <list<ValueType> _ArgVT> { field bit HasOMod = HasModifiers; field bit HasClamp = HasModifiers; field bit HasSDWAClamp = HasSrc0; + field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret; field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index ebc01ffc3af..6ba3e12c29c 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -100,6 +100,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3On let VOP3 = 1; let VALU = 1; + let FPClamp = P.HasFPClamp; let Uses = [EXEC]; let AsmVariantName = AMDGPUAsmVariants.VOP3; |