diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 23 |
1 files changed, 18 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 0aad8f0843d..1a30a163e6d 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -728,7 +728,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { switch (Op) { case AMDGPU::V_MAX_F32_e64: case AMDGPU::V_MAX_F16_e64: - case AMDGPU::V_MAX_F64: { + case AMDGPU::V_MAX_F64: + case AMDGPU::V_PK_MAX_F16: { if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm()) return nullptr; @@ -741,9 +742,18 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { return nullptr; // Can't fold up if we have modifiers. - if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || - TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) || - TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) + if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) + return nullptr; + + unsigned Src0Mods + = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm(); + unsigned Src1Mods + = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm(); + + // Having a 0 op_sel_hi would require swizzling the output in the source + // instruction, which we can't do. + unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0; + if (Src0Mods != UnsetMods && Src1Mods != UnsetMods) return nullptr; return Src0; } @@ -771,8 +781,11 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) { return false; MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg()); - if (!TII->hasFPClamp(*Def)) + + // The type of clamp must be compatible. + if (TII->getClampMask(*Def) != TII->getClampMask(MI)) return false; + MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp); if (!DefClamp) return false; |

