summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp23
1 files changed, 18 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 0aad8f0843d..1a30a163e6d 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -728,7 +728,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
switch (Op) {
case AMDGPU::V_MAX_F32_e64:
case AMDGPU::V_MAX_F16_e64:
- case AMDGPU::V_MAX_F64: {
+ case AMDGPU::V_MAX_F64:
+ case AMDGPU::V_PK_MAX_F16: {
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
return nullptr;
@@ -741,9 +742,18 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
return nullptr;
// Can't fold up if we have modifiers.
- if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
- TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
- TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return nullptr;
+
+ unsigned Src0Mods
+ = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
+ unsigned Src1Mods
+ = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
+
+ // Having a 0 op_sel_hi would require swizzling the output in the source
+ // instruction, which we can't do.
+ unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0;
+ if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
return nullptr;
return Src0;
}
@@ -771,8 +781,11 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
return false;
MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
- if (!TII->hasFPClamp(*Def))
+
+ // The type of clamp must be compatible.
+ if (TII->getClampMask(*Def) != TII->getClampMask(MI))
return false;
+
MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
if (!DefClamp)
return false;
OpenPOWER on IntegriCloud