diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2014-01-22 21:55:46 +0000 | 
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2014-01-22 21:55:46 +0000 | 
| commit | e89373e0622724f99194276d3a97ad6052db41a8 (patch) | |
| tree | 46631c02c31871551acf7ed1a631ad6f0e850ef8 /llvm/lib/Target | |
| parent | 59ed4794c4fff1456c7afc970a36266ba505ae48 (diff) | |
| download | bcm5719-llvm-e89373e0622724f99194276d3a97ad6052db41a8.tar.gz bcm5719-llvm-e89373e0622724f99194276d3a97ad6052db41a8.zip | |
R600: Add work-around for the CF stack entry HW bug
The CF stack can be corrupted if you use CF_ALU_PUSH_BEFORE,
CF_ALU_ELSE_AFTER, CF_ALU_BREAK, or CF_ALU_CONTINUE when the number of
sub-entries on the stack is greater than or equal to the stack entry
size and sub-entries modulo 4 is either 0 or 3 (on cedar the bug is
present when number of sub-entries module 8 is either 7 or 0)
We choose to be conservative and always apply the work-around when the
number of sub-enries is greater than or equal to the stack entry size,
so that we can safely over-allocate the stack when we are unsure of the
stack allocation rules.
reviewed-by: Vincent Lejeune <vljn at ovi.com>
llvm-svn: 199842
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPU.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUSubtarget.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUSubtarget.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/Processors.td | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp | 43 | 
5 files changed, 63 insertions, 7 deletions
| diff --git a/llvm/lib/Target/R600/AMDGPU.td b/llvm/lib/Target/R600/AMDGPU.td index c4e5efc8d6e..d1e2cf5319c 100644 --- a/llvm/lib/Target/R600/AMDGPU.td +++ b/llvm/lib/Target/R600/AMDGPU.td @@ -63,6 +63,11 @@ def FeatureCaymanISA : SubtargetFeature<"caymanISA",          "true",          "Use Cayman ISA">; +def FeatureCFALUBug : SubtargetFeature<"cfalubug", +        "CFALUBug", +        "true", +        "GPU has CF_ALU bug">; +  class SubtargetFeatureFetchLimit <string Value> :                            SubtargetFeature <"fetch"#Value,          "TexVTXClauseSize", diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp index f36aa2071c7..e77ab5e6d14 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp @@ -39,6 +39,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :    EnableIRStructurizer = true;    EnableIfCvt = true;    WavefrontSize = 0; +  CFALUBug = false;    ParseSubtargetFeatures(GPU, FS);    DevName = GPU;  } @@ -97,6 +98,11 @@ AMDGPUSubtarget::getStackEntrySize() const {    }  }  bool +AMDGPUSubtarget::hasCFAluBug() const { +  assert(getGeneration() <= NORTHERN_ISLANDS); +  return CFALUBug; +} +bool  AMDGPUSubtarget::isTargetELF() const {    return false;  } diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h index 68d853218ba..7e7f4d0c004 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -52,6 +52,7 @@ private:    bool EnableIRStructurizer;    bool EnableIfCvt;    unsigned WavefrontSize; +  bool CFALUBug;    InstrItineraryData InstrItins; @@ -71,6 +72,7 @@ public:    bool isIfCvtEnabled() const;    unsigned getWavefrontSize() const;    unsigned getStackEntrySize() const; +  bool hasCFAluBug() const;    virtual bool enableMachineScheduler() const {      return getGeneration() <= NORTHERN_ISLANDS; diff --git a/llvm/lib/Target/R600/Processors.td b/llvm/lib/Target/R600/Processors.td index e601f353163..fde44814970 100644 --- a/llvm/lib/Target/R600/Processors.td +++ b/llvm/lib/Target/R600/Processors.td @@ -46,13 +46,15 @@ def : Proc<"rv770",      R600_VLIW5_Itin,  //===----------------------------------------------------------------------===//  def : Proc<"cedar",      R600_VLIW5_Itin, -    [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32]>; +    [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32, +     FeatureCFALUBug]>;  def : Proc<"redwood",    R600_VLIW5_Itin, -    [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>; +    [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64, +     FeatureCFALUBug]>;  def : Proc<"sumo",       R600_VLIW5_Itin, -    [FeatureEvergreen, FeatureWavefrontSize64]>; +    [FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>;  def : Proc<"juniper",    R600_VLIW5_Itin,      [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>; @@ -66,13 +68,13 @@ def : Proc<"cypress",    R600_VLIW5_Itin,  //===----------------------------------------------------------------------===//  def : Proc<"barts",      R600_VLIW5_Itin, -    [FeatureNorthernIslands, FeatureVertexCache]>; +    [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;  def : Proc<"turks",      R600_VLIW5_Itin, -    [FeatureNorthernIslands, FeatureVertexCache]>; +    [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;  def : Proc<"caicos",     R600_VLIW5_Itin, -    [FeatureNorthernIslands]>; +    [FeatureNorthernIslands, FeatureCFALUBug]>;  def : Proc<"cayman",     R600_VLIW4_Itin,      [FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>; diff --git a/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp index 6b42a7a9faf..470ff2e1079 100644 --- a/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -73,6 +73,44 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) {    return false;  } +bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { +  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() && +      getLoopDepth() > 1) +    return true; + +  if (!ST.hasCFAluBug()) +    return false; + +  switch(Opcode) { +  default: return false; +  case AMDGPU::CF_ALU_PUSH_BEFORE: +  case AMDGPU::CF_ALU_ELSE_AFTER: +  case AMDGPU::CF_ALU_BREAK: +  case AMDGPU::CF_ALU_CONTINUE: +    if (CurrentSubEntries == 0) +      return false; +    if (ST.getWavefrontSize() == 64) { +      // We are being conservative here.  We only require this work-around if +      // CurrentSubEntries > 3 && +      // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) +      // +      // We have to be conservative, because we don't know for certain that +      // our stack allocation algorithm for Evergreen/NI is correct.  Applying this +      // work-around when CurrentSubEntries > 3 allows us to over-allocate stack +      // resources without any problems. +      return CurrentSubEntries > 3; +    } else { +      assert(ST.getWavefrontSize() == 32); +      // We are being conservative here.  We only require the work-around if +      // CurrentSubEntries > 7 && +      // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) +      // See the comment on the wavefront size == 64 case for why we are +      // being conservative. +      return CurrentSubEntries > 7; +    } +  } +} +  unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {    switch(Item) {    default: @@ -472,9 +510,12 @@ public:          if (MI->getOpcode() == AMDGPU::CF_ALU)            LastAlu.back() = MI;          I++; +        bool RequiresWorkAround = +            CFStack.requiresWorkAroundForInst(MI->getOpcode());          switch (MI->getOpcode()) {          case AMDGPU::CF_ALU_PUSH_BEFORE: -          if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) { +          if (RequiresWorkAround) { +            DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");              BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))                  .addImm(CfCount + 1)                  .addImm(1); | 

