diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 15 | 
5 files changed, 32 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 32e83cb385f..ee2b415099b 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -338,6 +338,9 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,                                      unsigned &SMovOp,                                      int64_t &Imm) { +  if (Copy->getOpcode() != AMDGPU::COPY) +    return false; +    if (!MoveImm->isMoveImmediate())      return false; @@ -564,7 +567,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {        switch (MI.getOpcode()) {        default:          continue; -      case AMDGPU::COPY: { +      case AMDGPU::COPY: +      case AMDGPU::WQM: {          // If the destination register is a physical register there isn't really          // much we can do to fix this.          if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 7abb4636d72..fecad1e1646 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3942,6 +3942,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,                                 Op.getOperand(1), Op.getOperand(2));      return DAG.getNode(ISD::BITCAST, DL, VT, Node);    } +  case Intrinsic::amdgcn_wqm: { +    SDValue Src = Op.getOperand(1); +    return SDValue(DAG.getMachineNode(AMDGPU::WQM, DL, Src.getValueType(), Src), +                   0); +  }    default:      return Op;    } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1d884524bcd..dc4b9998786 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2666,6 +2666,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {    case AMDGPU::COPY: return AMDGPU::COPY;    case AMDGPU::PHI: return AMDGPU::PHI;    case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; +  case AMDGPU::WQM: return AMDGPU::WQM;    case AMDGPU::S_MOV_B32:      return MI.getOperand(1).isReg() ?             AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; @@ -3970,6 +3971,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(    case AMDGPU::PHI:    case AMDGPU::REG_SEQUENCE:    case AMDGPU::INSERT_SUBREG: +  case AMDGPU::WQM:      if (RI.hasVGPRs(NewDstRC))        return nullptr; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 50e806188a9..f20ce4d4b28 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -116,6 +116,11 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),  // pass to enable folding of inline immediates.  def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),                                        (ins VSrc_b64:$src0)>; + +// Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy +// after the WQM pass processes them. +def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; +  } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]  let usesCustomInserter = 1, SALU = 1 in { diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index a613a220e29..62e1b7e84c4 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -136,6 +136,7 @@ private:    DenseMap<const MachineInstr *, InstrInfo> Instructions;    DenseMap<MachineBasicBlock *, BlockInfo> Blocks;    SmallVector<MachineInstr *, 1> LiveMaskQueries; +  SmallVector<MachineInstr *, 4> LowerToCopyInstrs;    void printInfo(); @@ -162,6 +163,7 @@ private:    void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);    void lowerLiveMaskQueries(unsigned LiveMaskReg); +  void lowerCopyInstrs();  public:    static char ID; @@ -294,6 +296,11 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,          markUsesWQM(MI, Worklist);          GlobalFlags |= StateWQM;          continue; +      } else if (Opcode == AMDGPU::WQM) { +        // The WQM intrinsic requires its output to have all the helper lanes +        // correct, so we need it to be in WQM. +        Flags = StateWQM; +        LowerToCopyInstrs.push_back(&MI);        } else if (TII->isDisableWQM(MI)) {          Flags = StateExact;        } else { @@ -666,6 +673,11 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {    }  } +void SIWholeQuadMode::lowerCopyInstrs() { +  for (MachineInstr *MI : LowerToCopyInstrs) +    MI->setDesc(TII->get(AMDGPU::COPY)); +} +  bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {    if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS)      return false; @@ -673,6 +685,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {    Instructions.clear();    Blocks.clear();    LiveMaskQueries.clear(); +  LowerToCopyInstrs.clear();    const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); @@ -708,6 +721,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {            .addReg(AMDGPU::EXEC);        lowerLiveMaskQueries(LiveMaskReg); +      lowerCopyInstrs();        // EntryMI may become invalid here        return true;      } @@ -716,6 +730,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {    DEBUG(printInfo());    lowerLiveMaskQueries(LiveMaskReg); +  lowerCopyInstrs();    // Handle the general case    for (auto BII : Blocks)  | 

