diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 31 | 
3 files changed, 66 insertions, 1 deletions
| diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0b0d0388031..e9492c4cf9c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1099,6 +1099,28 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {      MI.eraseFromParent();      break;    } +  case AMDGPU::V_SET_INACTIVE_B32: { +    BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) +      .addReg(AMDGPU::EXEC); +    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) +      .add(MI.getOperand(2)); +    BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) +      .addReg(AMDGPU::EXEC); +    MI.eraseFromParent(); +    break; +  } +  case AMDGPU::V_SET_INACTIVE_B64: { +    BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) +      .addReg(AMDGPU::EXEC); +    MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), +                                 MI.getOperand(0).getReg()) +      .add(MI.getOperand(2)); +    expandPostRAPseudo(*Copy); +    BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC) +      .addReg(AMDGPU::EXEC); +    MI.eraseFromParent(); +    break; +  }    case AMDGPU::V_MOVRELD_B32_V1:    case AMDGPU::V_MOVRELD_B32_V2:    case AMDGPU::V_MOVRELD_B32_V4: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index a13c8f32fe6..70ad847fc5e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -137,6 +137,20 @@ def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> {    let mayStore = 0;  } +// Invert the exec mask and overwrite the inactive lanes of dst with inactive, +// restoring it after we're done. +def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst), +  (ins VGPR_32: $src, VSrc_b32:$inactive), +  [(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> { +  let Constraints = "$src = $vdst"; +} + +def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst), +  (ins VReg_64: $src, VSrc_b64:$inactive), +  [(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> { +  let Constraints = "$src = $vdst"; +} +  let usesCustomInserter = 1, SALU = 1 in {  def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),    [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>; diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 1a0f0f9aca9..8aa57ba7293 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -303,6 +303,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,                                         std::vector<WorkItem> &Worklist) {    char GlobalFlags = 0;    bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs"); +  SmallVector<MachineInstr *, 4> SetInactiveInstrs;    // We need to visit the basic blocks in reverse post-order so that we visit    // defs before uses, in particular so that we don't accidentally mark an @@ -341,6 +342,23 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,          GlobalFlags |= StateWWM;          LowerToCopyInstrs.push_back(&MI);          continue; +      } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 || +                 Opcode == AMDGPU::V_SET_INACTIVE_B64) { +        III.Disabled = StateWWM; +        MachineOperand &Inactive = MI.getOperand(2); +        if (Inactive.isReg()) { +          if (Inactive.isUndef()) { +            LowerToCopyInstrs.push_back(&MI); +          } else { +            unsigned Reg = Inactive.getReg(); +            if (TargetRegisterInfo::isVirtualRegister(Reg)) { +              for (MachineInstr &DefMI : MRI->def_instructions(Reg)) +                markInstruction(DefMI, StateWWM, Worklist); +            } +          } +        } +        SetInactiveInstrs.push_back(&MI); +        continue;        } else if (TII->isDisableWQM(MI)) {          BBI.Needs |= StateExact;          if (!(BBI.InNeeds & StateExact)) { @@ -380,6 +398,14 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,      }    } +  // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is +  // ever used anywhere in the function. This implements the corresponding +  // semantics of @llvm.amdgcn.set.inactive. +  if (GlobalFlags & StateWQM) { +    for (MachineInstr *MI : SetInactiveInstrs) +      markInstruction(*MI, StateWQM, Worklist); +  } +    return GlobalFlags;  } @@ -799,8 +825,11 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {  }  void SIWholeQuadMode::lowerCopyInstrs() { -  for (MachineInstr *MI : LowerToCopyInstrs) +  for (MachineInstr *MI : LowerToCopyInstrs) { +    for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--) +      MI->RemoveOperand(i);      MI->setDesc(TII->get(AMDGPU::COPY)); +  }  }  bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { | 

