diff options
| author | Roman Lebedev <lebedev.ri@gmail.com> | 2019-02-04 09:12:21 +0000 | 
|---|---|---|
| committer | Roman Lebedev <lebedev.ri@gmail.com> | 2019-02-04 09:12:21 +0000 | 
| commit | 5b94fe96234455ee285caa0774025be9016a1a91 (patch) | |
| tree | 4b82ae3fcdf1b03d6e911a189ecc0765be23d5b9 /llvm/tools | |
| parent | 1a0d595f15602b6fe222f0ee7dfd0285e433ab7c (diff) | |
| download | bcm5719-llvm-5b94fe96234455ee285caa0774025be9016a1a91.tar.gz bcm5719-llvm-5b94fe96234455ee285caa0774025be9016a1a91.zip  | |
[llvm-exegesis] Cut run time of analysis mode by -84% (*sic*) (YamlContext::getInstrOpcode())
Summary:
```
$ perf stat -r 9 ./bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file="" -analysis-inconsistencies-output-file=/tmp/clusters-old.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-old.html'
...
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-old.html'
 Performance counter stats for './bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file= -analysis-inconsistencies-output-file=/tmp/clusters-old.html' (9 runs):
           9465.46 msec task-clock                #    1.000 CPUs utilized            ( +-  0.05% )
                60      context-switches          #    6.363 M/sec                    ( +- 79.45% )
                 0      cpu-migrations            #    0.000 K/sec
             11364      page-faults               # 1200.697 M/sec                    ( +-  0.60% )
       37935623543      cycles                    # 4008083.912 GHz                   ( +-  0.05% )  (83.32%)
        2371625356      stalled-cycles-frontend   #    6.25% frontend cycles idle     ( +-  0.37% )  (83.32%)
        8476077875      stalled-cycles-backend    #   22.34% backend cycles idle      ( +-  0.18% )  (33.36%)
       41822439158      instructions              #    1.10  insn per cycle
                                                  #    0.20  stalled cycles per insn  ( +-  0.02% )  (50.03%)
       11607658944      branches                  # 1226405861.486 M/sec              ( +-  0.01% )  (66.69%)
         210864633      branch-misses             #    1.82% of all branches          ( +-  0.06% )  (83.34%)
           9.46636 +- 0.00441 seconds time elapsed  ( +-  0.05% )
```
```
$ perf stat -r 9 ./bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file="" -analysis-inconsistencies-output-file=/tmp/clusters-bew.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-bew.html'
...
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-bew.html'
 Performance counter stats for './bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file= -analysis-inconsistencies-output-file=/tmp/clusters-bew.html' (9 runs):
           1480.66 msec task-clock                #    1.000 CPUs utilized            ( +-  0.19% )
                13      context-switches          #    8.483 M/sec                    ( +- 83.10% )
                 0      cpu-migrations            #    0.075 M/sec                    ( +-100.00% )
             11596      page-faults               # 7834.247 M/sec                    ( +-  0.59% )
        5933732194      cycles                    # 4008977.535 GHz                   ( +-  0.19% )  (83.22%)
         438111928      stalled-cycles-frontend   #    7.38% frontend cycles idle     ( +-  0.37% )  (83.25%)
        1454969705      stalled-cycles-backend    #   24.52% backend cycles idle      ( +-  0.94% )  (33.53%)
        7724218604      instructions              #    1.30  insn per cycle
                                                  #    0.19  stalled cycles per insn  ( +-  0.07% )  (50.14%)
        1979796413      branches                  # 1337599858.945 M/sec              ( +-  0.06% )  (66.74%)
          32641638      branch-misses             #    1.65% of all branches          ( +-  0.18% )  (83.31%)
           1.48128 +- 0.00284 seconds time elapsed  ( +-  0.19% )
$ sha512sum /tmp/clusters-*
db4bbd904fe8840853b589b032c5041bc060b91bcd9c27b914b56581fbc473550eea74b852238c79963b5adf2419f379e9f5db76784048b48e3937f9f3e732bf  /tmp/clusters-bew.html
db4bbd904fe8840853b589b032c5041bc060b91bcd9c27b914b56581fbc473550eea74b852238c79963b5adf2419f379e9f5db76784048b48e3937f9f3e732bf  /tmp/clusters-old.html
```
Reviewers: courbet, gchatelet
Reviewed By: courbet
Subscribers: tschuett, llvm-commits, RKSimon
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57657
llvm-svn: 353024
Diffstat (limited to 'llvm/tools')
| -rw-r--r-- | llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp | 24 | 
1 files changed, 18 insertions, 6 deletions
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp index fae75125591..1dd413a2912 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp @@ -9,8 +9,9 @@  #include "BenchmarkResult.h"  #include "BenchmarkRunner.h"  #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/bit.h" +#include "llvm/ADT/StringMap.h"  #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/bit.h"  #include "llvm/ObjectYAML/YAML.h"  #include "llvm/Support/FileOutputBuffer.h"  #include "llvm/Support/FileSystem.h" @@ -29,7 +30,18 @@ namespace {  // serialization process to encode/decode registers and instructions.  struct YamlContext {    YamlContext(const exegesis::LLVMState &State) -      : State(&State), ErrorStream(LastError) {} +      : State(&State), ErrorStream(LastError), +        OpcodeNameToOpcodeIdx( +            generateOpcodeNameToOpcodeIdxMapping(State.getInstrInfo())) {} + +  static llvm::StringMap<unsigned> +  generateOpcodeNameToOpcodeIdxMapping(const llvm::MCInstrInfo &InstrInfo) { +    llvm::StringMap<unsigned> Map(InstrInfo.getNumOpcodes()); +    for (unsigned I = 0, E = InstrInfo.getNumOpcodes(); I < E; ++I) +      Map[InstrInfo.getName(I)] = I; +    assert(Map.size() == InstrInfo.getNumOpcodes() && "Size prediction failed"); +    return Map; +  };    void serializeMCInst(const llvm::MCInst &MCInst, llvm::raw_ostream &OS) {      OS << getInstrName(MCInst.getOpcode()); @@ -136,10 +148,9 @@ private:    }    unsigned getInstrOpcode(llvm::StringRef InstrName) { -    const llvm::MCInstrInfo &InstrInfo = State->getInstrInfo(); -    for (unsigned E = InstrInfo.getNumOpcodes(), I = 0; I < E; ++I) -      if (InstrInfo.getName(I) == InstrName) -        return I; +    auto Iter = OpcodeNameToOpcodeIdx.find(InstrName); +    if (Iter != OpcodeNameToOpcodeIdx.end()) +      return Iter->second;      ErrorStream << "No opcode with name " << InstrName;      return 0;    } @@ -147,6 +158,7 @@ private:    const llvm::exegesis::LLVMState *State;    std::string LastError;    llvm::raw_string_ostream ErrorStream; +  const llvm::StringMap<unsigned> OpcodeNameToOpcodeIdx;  };  } // namespace  | 

