diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 34 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 10 |
5 files changed, 37 insertions, 37 deletions
diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index 972e61d376d..1bfa837bfb2 100644 --- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -12,16 +12,16 @@ /// common data and/or have enough undef subreg using swizzle abilities. /// /// For instance let's consider the following pseudo code : -/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 +/// %5<def> = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 /// ... -/// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 -/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 +/// %7<def> = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3 +/// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3 /// /// is turned into : -/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 +/// %5<def> = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 /// ... -/// vreg7<def> = INSERT_SUBREG vreg4, sub3 -/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 +/// %7<def> = INSERT_SUBREG %4, sub3 +/// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3 /// /// This allow regalloc to reduce register pressure for vector registers and /// to reduce MOV count. diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 34b1f758f7b..e9b381ce89b 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -14,46 +14,46 @@ /// Register Class <vsrc> is the union of <vgpr> and <sgpr> /// /// BB0: -/// %vreg0 <sgpr> = SCALAR_INST -/// %vreg1 <vsrc> = COPY %vreg0 <sgpr> +/// %0 <sgpr> = SCALAR_INST +/// %1 <vsrc> = COPY %0 <sgpr> /// ... /// BRANCH %cond BB1, BB2 /// BB1: -/// %vreg2 <vgpr> = VECTOR_INST -/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> +/// %2 <vgpr> = VECTOR_INST +/// %3 <vsrc> = COPY %2 <vgpr> /// BB2: -/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> -/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> +/// %4 <vsrc> = PHI %1 <vsrc>, <BB#0>, %3 <vrsc>, <BB#1> +/// %5 <vgpr> = VECTOR_INST %4 <vsrc> /// /// /// The coalescer will begin at BB0 and eliminate its copy, then the resulting /// code will look like this: /// /// BB0: -/// %vreg0 <sgpr> = SCALAR_INST +/// %0 <sgpr> = SCALAR_INST /// ... /// BRANCH %cond BB1, BB2 /// BB1: -/// %vreg2 <vgpr> = VECTOR_INST -/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> +/// %2 <vgpr> = VECTOR_INST +/// %3 <vsrc> = COPY %2 <vgpr> /// BB2: -/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> -/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> +/// %4 <sgpr> = PHI %0 <sgpr>, <BB#0>, %3 <vsrc>, <BB#1> +/// %5 <vgpr> = VECTOR_INST %4 <sgpr> /// /// Now that the result of the PHI instruction is an SGPR, the register -/// allocator is now forced to constrain the register class of %vreg3 to +/// allocator is now forced to constrain the register class of %3 to /// <sgpr> so we end up with final code like this: /// /// BB0: -/// %vreg0 <sgpr> = SCALAR_INST +/// %0 <sgpr> = SCALAR_INST /// ... /// BRANCH %cond BB1, BB2 /// BB1: -/// %vreg2 <vgpr> = VECTOR_INST -/// %vreg3 <sgpr> = COPY %vreg2 <vgpr> +/// %2 <vgpr> = VECTOR_INST +/// %3 <sgpr> = COPY %2 <vgpr> /// BB2: -/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> -/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> +/// %4 <sgpr> = PHI %0 <sgpr>, <BB#0>, %3 <sgpr>, <BB#1> +/// %5 <vgpr> = VECTOR_INST %4 <sgpr> /// /// Now this code contains an illegal copy from a VGPR to an SGPR. /// diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 2c52e16892c..52157408b36 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -290,11 +290,11 @@ void SIFoldOperands::foldOperand( // copy since a subregister use tied to a full register def doesn't really // make sense. e.g. don't fold: // - // %vreg1 = COPY %vreg0:sub1 - // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0> + // %1 = COPY %0:sub1 + // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0> // // into - // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0> + // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0> if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister) return; } @@ -971,7 +971,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // Prevent folding operands backwards in the function. For example, // the COPY opcode must not be replaced by 1 in this example: // - // %vreg3<def> = COPY %vgpr0; VGPR_32:%vreg3 + // %3<def> = COPY %vgpr0; VGPR_32:%3 // ... // %vgpr0<def> = V_MOV_B32_e32 1, %exec<imp-use> MachineOperand &Dst = MI.getOperand(0); diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 5738077f989..bb8fa2c89fb 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -10,12 +10,12 @@ /// \file This pass tries to apply several peephole SDWA patterns. /// /// E.g. original: -/// V_LSHRREV_B32_e32 %vreg0, 16, %vreg1 -/// V_ADD_I32_e32 %vreg2, %vreg0, %vreg3 -/// V_LSHLREV_B32_e32 %vreg4, 16, %vreg2 +/// V_LSHRREV_B32_e32 %0, 16, %1 +/// V_ADD_I32_e32 %2, %0, %3 +/// V_LSHLREV_B32_e32 %4, 16, %2 /// /// Replace: -/// V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3 +/// V_ADD_I32_sdwa %4, %1, %3 /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD /// //===----------------------------------------------------------------------===// @@ -410,7 +410,7 @@ Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const { } // If this is not immediate then it can be copy of immediate value, e.g.: - // %vreg1<def> = S_MOV_B32 255; + // %1<def> = S_MOV_B32 255; if (Op.isReg()) { for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) { if (!isSameReg(Op, Def)) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 152b24599e9..4407a9d0f37 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1347,13 +1347,13 @@ bool SIRegisterInfo::shouldRewriteCopySrc( // class. // // e.g. if we have something like - // vreg0 = ... - // vreg1 = ... - // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2 - // vreg3 = COPY vreg2, sub0 + // %0 = ... + // %1 = ... + // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2 + // %3 = COPY %2, sub0 // // We want to look through the COPY to find: - // => vreg3 = COPY vreg0 + // => %3 = COPY %0 // Plain copy. return getCommonSubClass(DefRC, SrcRC) != nullptr; |

