diff options
author | Ryan Taylor <rtayl@amd.com> | 2019-06-26 17:34:57 +0000 |
---|---|---|
committer | Ryan Taylor <rtayl@amd.com> | 2019-06-26 17:34:57 +0000 |
commit | 9ab812d4752b2a1442426db2ccc17dc95d12eb04 (patch) | |
tree | 1b9439a3630c4fcf053184b1e3d54c61bf7acd66 /llvm/lib/Target | |
parent | 4c86dd903265be9fd72a5ebf7c568a15f3cad0a6 (diff) | |
download | bcm5719-llvm-9ab812d4752b2a1442426db2ccc17dc95d12eb04.tar.gz bcm5719-llvm-9ab812d4752b2a1442426db2ccc17dc95d12eb04.zip |
[AMDGPU] Fix for branch offset hardware workaround
Summary:
This fixes a hardware bug that makes a branch offset of 0x3f unsafe.
This replaces the 32 bit branch with offset 0x3f to a 64 bit
instruction that includes the same 32 bit branch and the encoding
for a s_nop 0 to follow. The relaxer than modifies the offsets
accordingly.
Change-Id: I10b7aed99d651f8159401b01bb421f105fa6288e
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63494
llvm-svn: 364451
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 63 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 |
7 files changed, 111 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 225268d646d..63c3f776d4d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -193,6 +193,12 @@ def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug", "GFX10 bug, inst_offset ignored in flat segment" >; +def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", + "HasOffset3fBug", + "true", + "Branch offset of 3f hardware bug" +>; + class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < "ldsbankcount"#Value, "LDSBankCount", @@ -767,6 +773,7 @@ def FeatureGroup { FeatureVcmpxExecWARHazard, FeatureLdsBranchVmemWARHazard, FeatureNSAtoVMEMBug, + FeatureOffset3fBug, FeatureFlatSegmentOffsetBug ]; } @@ -1068,6 +1075,9 @@ def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, AssemblerPredicate<"FeatureDot6Insts">; +def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">, + AssemblerPredicate<"FeatureOffset3fBug">; + def EnableLateCFGStructurize : Predicate< "EnableLateStructurizeCFG">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 17222a77a0b..ea641017c80 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -261,6 +261,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasVcmpxExecWARHazard(false), HasLdsBranchVmemWARHazard(false), HasNSAtoVMEMBug(false), + HasOffset3fBug(false), HasFlatSegmentOffsetBug(false), FeatureDisable(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 992b92d854d..f4bf315a8f1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -368,6 +368,7 @@ protected: bool HasVcmpxExecWARHazard; bool HasLdsBranchVmemWARHazard; bool HasNSAtoVMEMBug; + bool HasOffset3fBug; bool HasFlatSegmentOffsetBug; // Dummy feature to use for assembler in tablegen. @@ -926,6 +927,10 @@ public: return HasR128A16; } + bool hasOffset3fBug() const { + return HasOffset3fBug; + } + bool hasNSAEncoding() const { return HasNSAEncoding; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 9add5ebffba..57c0ba26cc3 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -37,17 +37,13 @@ public: const MCSubtargetInfo *STI) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override { - return false; - } + const MCAsmLayout &Layout) const override; + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - MCInst &Res) const override { - llvm_unreachable("Not implemented"); - } + MCInst &Res) const override; + bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - return false; - } + const MCSubtargetInfo &STI) const override; unsigned getMinimumNopSize() const override; bool writeNopData(raw_ostream &OS, uint64_t Count) const override; @@ -57,6 +53,36 @@ public: } //End anonymous namespace +void AMDGPUAsmBackend::relaxInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI, + MCInst &Res) const { + unsigned RelaxedOpcode = AMDGPU::getSOPPWithRelaxation(Inst.getOpcode()); + Res.setOpcode(RelaxedOpcode); + Res.addOperand(Inst.getOperand(0)); + return; +} + +bool AMDGPUAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + // if the branch target has an offset of x3f this needs to be relaxed to + // add a s_nop 0 immediately after branch to effectively increment offset + // for hardware workaround in gfx1010 + return (((int64_t(Value)/4)-1) == 0x3f); +} + +bool AMDGPUAsmBackend::mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const { + if (!STI.getFeatureBits()[AMDGPU::FeatureOffset3fBug]) + return false; + + if (AMDGPU::getSOPPWithRelaxation(Inst.getOpcode()) >= 0) + return true; + + return false; +} + static unsigned getFixupKindNumBytes(unsigned Kind) { switch (Kind) { case AMDGPU::fixup_si_sopp_br: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index cd1c7fdae92..a988a504ccf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2309,6 +2309,15 @@ def getVCMPXNoSDstOp : InstrMapping { let ValueCols = [["0"]]; } +// Maps a SOPP to a SOPP with S_NOP +def getSOPPWithRelaxation : InstrMapping { + let FilterClass = "Base_SOPP"; + let RowFields = ["AsmString"]; + let ColFields = ["Size"]; + let KeyCol = ["4"]; + let ValueCols = [["8"]]; +} + include "SIInstructions.td" include "DSInstructions.td" diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index d7bb869377a..9d780573cb7 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -922,6 +922,10 @@ def S_SET_GPR_IDX_ON : SOPC <0x11, // SOPP Instructions //===----------------------------------------------------------------------===// +class Base_SOPP <string asm> { + string AsmString = asm; +} + class SOPPe <bits<7> op> : Enc32 { bits <16> simm16; @@ -931,7 +935,7 @@ class SOPPe <bits<7> op> : Enc32 { } class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> : - InstSI <(outs), ins, asm, pattern >, SOPPe <op> { + InstSI <(outs), ins, asm, pattern >, SOPPe <op>, Base_SOPP <asm> { let mayLoad = 0; let mayStore = 0; @@ -944,9 +948,38 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> : let UseNamedOperandTable = 1; } - def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">; +class SOPP_w_nop_e <bits<7> op> : Enc64 { + bits <16> simm16; + + let Inst{15-0} = simm16; + let Inst{22-16} = op; + let Inst{31-23} = 0x17f; // encoding + let Inst{47-32} = 0x0; + let Inst{54-48} = S_NOP.Inst{22-16}; // opcode + let Inst{63-55} = S_NOP.Inst{31-23}; // encoding +} + +class SOPP_w_nop <bits<7> op, dag ins, string asm, list<dag> pattern = []> : + InstSI <(outs), ins, asm, pattern >, SOPP_w_nop_e <op>, Base_SOPP <asm> { + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let SALU = 1; + let SOPP = 1; + let Size = 8; + let SchedRW = [WriteSALU]; + + let UseNamedOperandTable = 1; +} + +multiclass SOPP_With_Relaxation <bits<7> op, dag ins, string asm, list<dag> pattern = []> { + def "" : SOPP <op, ins, asm, pattern>; + def _pad_s_nop : SOPP_w_nop <op, ins, asm, pattern>; +} + let isTerminator = 1 in { def S_ENDPGM : SOPP <0x00000001, (ins EndpgmImm:$simm16), "s_endpgm$simm16"> { @@ -976,61 +1009,61 @@ let SubtargetPredicate = isGFX10Plus in { } // End SubtargetPredicate = isGFX10Plus let isBranch = 1, SchedRW = [WriteBranch] in { -def S_BRANCH : SOPP < +let isBarrier = 1 in { +defm S_BRANCH : SOPP_With_Relaxation < 0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16", - [(br bb:$simm16)]> { - let isBarrier = 1; + [(br bb:$simm16)]>; } let Uses = [SCC] in { -def S_CBRANCH_SCC0 : SOPP < +defm S_CBRANCH_SCC0 : SOPP_With_Relaxation < 0x00000004, (ins sopp_brtarget:$simm16), "s_cbranch_scc0 $simm16" >; -def S_CBRANCH_SCC1 : SOPP < +defm S_CBRANCH_SCC1 : SOPP_With_Relaxation < 0x00000005, (ins sopp_brtarget:$simm16), "s_cbranch_scc1 $simm16" >; } // End Uses = [SCC] let Uses = [VCC] in { -def S_CBRANCH_VCCZ : SOPP < +defm S_CBRANCH_VCCZ : SOPP_With_Relaxation < 0x00000006, (ins sopp_brtarget:$simm16), "s_cbranch_vccz $simm16" >; -def S_CBRANCH_VCCNZ : SOPP < +defm S_CBRANCH_VCCNZ : SOPP_With_Relaxation < 0x00000007, (ins sopp_brtarget:$simm16), "s_cbranch_vccnz $simm16" >; } // End Uses = [VCC] let Uses = [EXEC] in { -def S_CBRANCH_EXECZ : SOPP < +defm S_CBRANCH_EXECZ : SOPP_With_Relaxation < 0x00000008, (ins sopp_brtarget:$simm16), "s_cbranch_execz $simm16" >; -def S_CBRANCH_EXECNZ : SOPP < +defm S_CBRANCH_EXECNZ : SOPP_With_Relaxation < 0x00000009, (ins sopp_brtarget:$simm16), "s_cbranch_execnz $simm16" >; } // End Uses = [EXEC] -def S_CBRANCH_CDBGSYS : SOPP < +defm S_CBRANCH_CDBGSYS : SOPP_With_Relaxation < 0x00000017, (ins sopp_brtarget:$simm16), "s_cbranch_cdbgsys $simm16" >; -def S_CBRANCH_CDBGSYS_AND_USER : SOPP < +defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_With_Relaxation < 0x0000001A, (ins sopp_brtarget:$simm16), "s_cbranch_cdbgsys_and_user $simm16" >; -def S_CBRANCH_CDBGSYS_OR_USER : SOPP < +defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_With_Relaxation < 0x00000019, (ins sopp_brtarget:$simm16), "s_cbranch_cdbgsys_or_user $simm16" >; -def S_CBRANCH_CDBGUSER : SOPP < +defm S_CBRANCH_CDBGUSER : SOPP_With_Relaxation < 0x00000018, (ins sopp_brtarget:$simm16), "s_cbranch_cdbguser $simm16" >; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index b56dad808f4..5497fe195e2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -190,6 +190,9 @@ unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +LLVM_READONLY +int getSOPPWithRelaxation(uint16_t Opcode); + struct MIMGBaseOpcodeInfo { MIMGBaseOpcode BaseOpcode; bool Store; |