diff options
| author | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-08-29 07:46:09 +0000 | 
|---|---|---|
| committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-08-29 07:46:09 +0000 | 
| commit | 283b995097f91f340f69b229d2554e95a87fbe10 (patch) | |
| tree | 940f6c73dd2086d3538614e46ca9e8bd20a027fe | |
| parent | 025bb56a86cd9d681171e7033d003b4b76370b5d (diff) | |
| download | bcm5719-llvm-283b995097f91f340f69b229d2554e95a87fbe10.tar.gz bcm5719-llvm-283b995097f91f340f69b229d2554e95a87fbe10.zip  | |
AMDGPU: Fix getInstSizeInBytes
Summary:
Add some optional code to validate getInstSizeInBytes for emitted
instructions. This flushed out some issues which are fixed by this
patch:
- Streamline getInstSizeInBytes
- Properly define the VI readlane/writelane instruction as VOP3
- Fix the inline constant determination. Specifically, this change
  fixes an issue where a 32-bit value of 0xffffffff was recorded
  as unsigned. This is equal to -1 when restricting to a 32-bit
  comparison, and an inline constant can be used.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D50629
Change-Id: Id87c3b7975839da0de8156a124b0ce98c5fb47f2
llvm-svn: 340903
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 31 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll | 2 | 
5 files changed, 46 insertions, 35 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 1876dc3f712..f6bdbf5e9be 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -301,6 +301,26 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {      MCInstLowering.lower(MI, TmpInst);      EmitToStreamer(*OutStreamer, TmpInst); +#ifdef EXPENSIVE_CHECKS +    // Sanity-check getInstSizeInBytes on explicitly specified CPUs (it cannot +    // work correctly for the generic CPU). +    // +    // The isPseudo check really shouldn't be here, but unfortunately there are +    // some negative lit tests that depend on being able to continue through +    // here even when pseudo instructions haven't been lowered. +    if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU())) { +      SmallVector<MCFixup, 4> Fixups; +      SmallVector<char, 16> CodeBytes; +      raw_svector_ostream CodeStream(CodeBytes); + +      std::unique_ptr<MCCodeEmitter> InstEmitter(createSIMCCodeEmitter( +          *STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext)); +      InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI); + +      assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI)); +    } +#endif +      if (STI.dumpCode()) {        // Disassemble instruction/operands to text.        DisasmLines.resize(DisasmLines.size() + 1); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index bd6c5ac9d83..59dfb8a7a23 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2398,8 +2398,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,    case AMDGPU::OPERAND_REG_INLINE_C_INT32:    case AMDGPU::OPERAND_REG_INLINE_C_FP32: {      int32_t Trunc = static_cast<int32_t>(Imm); -    return Trunc == Imm && -           AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); +    return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());    }    case AMDGPU::OPERAND_REG_IMM_INT64:    case AMDGPU::OPERAND_REG_IMM_FP64: @@ -4975,12 +4974,6 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {    // If we have a definitive size, we can use it. Otherwise we need to inspect    // the operands to know the size. -  // -  // FIXME: Instructions that have a base 32-bit encoding report their size as -  // 4, even though they are really 8 bytes if they have a literal operand. -  if (DescSize != 0 && DescSize != 4) -    return DescSize; -    if (isFixedSize(MI))      return DescSize; @@ -4989,23 +4982,27 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {    if (isVALU(MI) || isSALU(MI)) {      int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);      if (Src0Idx == -1) -      return 4; // No operands. +      return DescSize; // No operands.      if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx])) -      return 8; +      return DescSize + 4;      int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);      if (Src1Idx == -1) -      return 4; +      return DescSize;      if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx])) -      return 8; +      return DescSize + 4; -    return 4; -  } +    int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); +    if (Src2Idx == -1) +      return DescSize; -  if (DescSize == 4) -    return 4; +    if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx])) +      return DescSize + 4; + +    return DescSize; +  }    switch (Opc) {    case TargetOpcode::IMPLICIT_DEF: @@ -5021,7 +5018,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {      return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());    }    default: -    llvm_unreachable("unable to find instruction size"); +    return DescSize;    }  } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 3a8dcab15f1..089e78b9803 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -716,12 +716,6 @@ class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfil  let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { -multiclass VOP32_Real_vi <bits<10> op> { -  def _vi : -    VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, -    VOP3e_vi<op, !cast<VOP2_Pseudo>(NAME).Pfl>; -} -  multiclass VOP2_Real_MADK_vi <bits<6> op> {    def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,              VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; @@ -899,9 +893,6 @@ defm V_ADD_U32            : VOP2_Real_e32e64_gfx9 <0x34>;  defm V_SUB_U32            : VOP2_Real_e32e64_gfx9 <0x35>;  defm V_SUBREV_U32         : VOP2_Real_e32e64_gfx9 <0x36>; -defm V_READLANE_B32       : VOP32_Real_vi <0x289>; -defm V_WRITELANE_B32      : VOP32_Real_vi <0x28a>; -  defm V_BFM_B32            : VOP2_Real_e64only_vi <0x293>;  defm V_BCNT_U32_B32       : VOP2_Real_e64only_vi <0x28b>;  defm V_MBCNT_LO_U32_B32   : VOP2_Real_e64only_vi <0x28c>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 26bc5260e17..2d558a34be3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -651,23 +651,23 @@ defm V_MAD_I64_I32      : VOP3be_Real_ci <0x177>;  let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {  multiclass VOP3_Real_vi<bits<10> op> { -  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, -            VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>; +  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>, +            VOP3e_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;  }  multiclass VOP3be_Real_vi<bits<10> op> { -  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, -            VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>; +  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>, +            VOP3be_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;  }  multiclass VOP3OpSel_Real_gfx9<bits<10> op> { -  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, -            VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME).Pfl>; +  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>, +            VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME).Pfl>;  }  multiclass VOP3Interp_Real_vi<bits<10> op> { -  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, -            VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>; +  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>, +            VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;  }  } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" @@ -813,6 +813,9 @@ defm V_MUL_LO_I32       : VOP3_Real_vi <0x285>;  defm V_MUL_HI_U32       : VOP3_Real_vi <0x286>;  defm V_MUL_HI_I32       : VOP3_Real_vi <0x287>; +defm V_READLANE_B32     : VOP3_Real_vi <0x289>; +defm V_WRITELANE_B32    : VOP3_Real_vi <0x28a>; +  defm V_LSHLREV_B64      : VOP3_Real_vi <0x28f>;  defm V_LSHRREV_B64      : VOP3_Real_vi <0x290>;  defm V_ASHRREV_I64      : VOP3_Real_vi <0x291>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll index 361756a013b..1b9a762f25b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s  ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck %s  declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0  | 

