diff options
author | Alexander Potapenko <glider@google.com> | 2012-09-26 13:16:42 +0000 |
---|---|---|
committer | Alexander Potapenko <glider@google.com> | 2012-09-26 13:16:42 +0000 |
commit | 3f6a5c1b2dff14f4b6e3a0df8badd0714d46481e (patch) | |
tree | c1aa1c2cd0fc2a9c1b6df07051bd1f4f60aae3ef /compiler-rt/lib/asan/scripts/asan_symbolize.py | |
parent | 59cc8776c40de66c2478ca52a721a79a75abb613 (diff) | |
download | bcm5719-llvm-3f6a5c1b2dff14f4b6e3a0df8badd0714d46481e.tar.gz bcm5719-llvm-3f6a5c1b2dff14f4b6e3a0df8badd0714d46481e.zip |
Fixed a number of gpylint warnings, added binary names filtering (useful for Chrome), minor fixes.
llvm-svn: 164695
Diffstat (limited to 'compiler-rt/lib/asan/scripts/asan_symbolize.py')
-rwxr-xr-x | compiler-rt/lib/asan/scripts/asan_symbolize.py | 163 |
1 files changed, 97 insertions, 66 deletions
diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py index 454e6924bf4..a4f91c8a01d 100755 --- a/compiler-rt/lib/asan/scripts/asan_symbolize.py +++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py @@ -10,8 +10,8 @@ import bisect import os import re -import sys import subprocess +import sys llvm_symbolizer = None symbolizers = {} @@ -23,23 +23,25 @@ DEBUG = False # FIXME: merge the code that calls fix_filename(). def fix_filename(file_name): for path_to_cut in sys.argv[1:]: - file_name = re.sub(".*" + path_to_cut, "", file_name) - file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) - file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) + file_name = re.sub('.*' + path_to_cut, '', file_name) + file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) + file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) return file_name class Symbolizer(object): def __init__(self): pass + def symbolize(self, addr, binary, offset): - """ - Overrided in subclasses. - Args: + """Symbolize the given address (pair of binary and offset). + + Overriden in subclasses. + Args: addr: virtual address of an instruction. binary: path to executable/shared object containing this instruction. offset: instruction offset in the @binary. - Returns: + Returns: list of strings (one string for each inlined frame) describing the code locations for this instruction (that is, function name, file name, line and column numbers). @@ -52,51 +54,54 @@ class LLVMSymbolizer(Symbolizer): super(LLVMSymbolizer, self).__init__() self.symbolizer_path = symbolizer_path self.pipe = self.open_llvm_symbolizer() + def open_llvm_symbolizer(self): if not os.path.exists(self.symbolizer_path): return None cmd = [self.symbolizer_path, - "--use-symbol-table=true", - "--demangle=false", - "--functions=true", - "--inlining=true"] + '--use-symbol-table=true', + '--demangle=false', + '--functions=true', + '--inlining=true'] if DEBUG: print ' '.join(cmd) return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) def symbolize(self, addr, binary, offset): - """Overrides Symbolizer.symbolize""" + """Overrides Symbolizer.symbolize.""" if not self.pipe: return None result = [] try: - symbolizer_input = "%s %s" % (binary, offset) + symbolizer_input = '%s %s' % (binary, offset) + if DEBUG: + print symbolizer_input print >> self.pipe.stdin, symbolizer_input while True: function_name = self.pipe.stdout.readline().rstrip() - if (function_name == ""): + if not function_name: break file_name = self.pipe.stdout.readline().rstrip() file_name = fix_filename(file_name) - if (not function_name.startswith("??") and - not file_name.startswith("??")): + if (not function_name.startswith('??') and + not file_name.startswith('??')): # Append only valid frames. - result.append("%s in %s %s" % (addr, function_name, + result.append('%s in %s %s' % (addr, function_name, file_name)) except Exception: result = [] - if len(result) == 0: + if not result: result = None return result def LLVMSymbolizerFactory(system): - if system == "Linux": - symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH") + if system == 'Linux': + symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') if not symbolizer_path: # Assume llvm-symbolizer is in PATH. - symbolizer_path = "llvm-symbolizer" + symbolizer_path = 'llvm-symbolizer' return LLVMSymbolizer(symbolizer_path) return None @@ -106,42 +111,45 @@ class Addr2LineSymbolizer(Symbolizer): super(Addr2LineSymbolizer, self).__init__() self.binary = binary self.pipe = self.open_addr2line() + def open_addr2line(self): - cmd = ["addr2line", "-f", "-e", self.binary] + cmd = ['addr2line', '-f', '-e', self.binary] if DEBUG: print ' '.join(cmd) return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + def symbolize(self, addr, binary, offset): - """Overrides Symbolizer.symbolize""" + """Overrides Symbolizer.symbolize.""" if self.binary != binary: return None try: print >> self.pipe.stdin, offset function_name = self.pipe.stdout.readline().rstrip() - file_name = self.pipe.stdout.readline().rstrip() + file_name = self.pipe.stdout.readline().rstrip() except Exception: - function_name = "" - file_name = "" + function_name = '' + file_name = '' file_name = fix_filename(file_name) - return ["%s in %s %s" % (addr, function_name, file_name)] + return ['%s in %s %s' % (addr, function_name, file_name)] class DarwinSymbolizer(Symbolizer): def __init__(self, addr, binary): super(DarwinSymbolizer, self).__init__() self.binary = binary - # Guess which arch we're running. 10 = len("0x") + 8 hex digits. + # Guess which arch we're running. 10 = len('0x') + 8 hex digits. if len(addr) > 10: - self.arch = "x86_64" + self.arch = 'x86_64' else: - self.arch = "i386" + self.arch = 'i386' self.vmaddr = None self.pipe = None + def get_binary_vmaddr(self): - """ - Get the slide value to be added to the address. - We're ooking for the following piece in otool -l output: + """Get the slide value to be added to the address. + + We're looking for the following piece in otool -l output: Load command 0 cmd LC_SEGMENT cmdsize 736 @@ -150,13 +158,13 @@ class DarwinSymbolizer(Symbolizer): """ if self.vmaddr: return self.vmaddr - cmdline = ["otool", "-l", self.binary] + cmdline = ['otool', '-l', self.binary] pipe = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE) is_text = False vmaddr = 0 - for line in pipe.stdout.readlines(): + for line in pipe.stdout: line = line.strip() if line.startswith('segname'): is_text = (line == 'segname __TEXT') @@ -167,19 +175,22 @@ class DarwinSymbolizer(Symbolizer): break self.vmaddr = vmaddr return self.vmaddr + def write_addr_to_pipe(self, offset): slide = self.get_binary_vmaddr() - print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide) + print >> self.pipe.stdin, '0x%x' % (int(offset, 16) + slide) + def open_atos(self): if DEBUG: - print "atos -o %s -arch %s" % (self.binary, self.arch) - cmdline = ["atos", "-o", self.binary, "-arch", self.arch] + print 'atos -o %s -arch %s' % (self.binary, self.arch) + cmdline = ['atos', '-o', self.binary, '-arch', self.arch] self.pipe = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + def symbolize(self, addr, binary, offset): - """Overrides Symbolizer.symbolize""" + """Overrides Symbolizer.symbolize.""" if self.binary != binary: return None self.open_atos() @@ -190,14 +201,14 @@ class DarwinSymbolizer(Symbolizer): # foo(type1, type2) (in object.name) (filename.cc:80) match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) if DEBUG: - print "atos_line: ", atos_line + print 'atos_line: ', atos_line if match: function_name = match.group(1) - function_name = re.sub("\(.*?\)", "", function_name) + function_name = re.sub('\(.*?\)', '', function_name) file_name = fix_filename(match.group(3)) - return ["%s in %s %s" % (addr, function_name, file_name)] + return ['%s in %s %s' % (addr, function_name, file_name)] else: - return ["%s in %s" % (addr, atos_line)] + return ['%s in %s' % (addr, atos_line)] # Chain several symbolizers so that if one symbolizer fails, we fall back @@ -206,20 +217,22 @@ class ChainSymbolizer(Symbolizer): def __init__(self, symbolizer_list): super(ChainSymbolizer, self).__init__() self.symbolizer_list = symbolizer_list + def symbolize(self, addr, binary, offset): - """Overrides Symbolizer.symbolize""" + """Overrides Symbolizer.symbolize.""" for symbolizer in self.symbolizer_list: if symbolizer: result = symbolizer.symbolize(addr, binary, offset) if result: return result return None + def append_symbolizer(self, symbolizer): self.symbolizer_list.append(symbolizer) -def BreakpadSymbolizerFactory(addr, binary): - suffix = os.getenv("BREAKPAD_SUFFIX") +def BreakpadSymbolizerFactory(binary): + suffix = os.getenv('BREAKPAD_SUFFIX') if suffix: filename = binary + suffix if os.access(filename, os.F_OK): @@ -249,6 +262,7 @@ class BreakpadSymbolizer(Symbolizer): self.debug_id = fragments[3] self.binary = ' '.join(fragments[4:]) self.parse_lines(lines[1:]) + def parse_lines(self, lines): cur_function_addr = '' for line in lines: @@ -274,6 +288,7 @@ class BreakpadSymbolizer(Symbolizer): int(fragments[2]), int(fragments[3])) self.address_list.sort() + def get_sym_file_line(self, addr): key = None if addr in self.addresses.keys(): @@ -291,13 +306,14 @@ class BreakpadSymbolizer(Symbolizer): return symbol, filename, line_no else: return None + def symbolize(self, addr, binary, offset): if self.binary != binary: return None res = self.get_sym_file_line(int(offset, 16)) if res: function_name, file_name, line_no = res - result = ["%s in %s %s:%d" % ( + result = ['%s in %s %s:%d' % ( addr, function_name, file_name, line_no)] print result return result @@ -306,19 +322,23 @@ class BreakpadSymbolizer(Symbolizer): class SymbolizationLoop(object): - def __init__(self): + def __init__(self, binary_name_filter=None): + # Used by clients who may want to supply a different binary name. + # E.g. in Chrome several binaries may share a single .dSYM. + self.binary_name_filter = binary_name_filter self.system = os.uname()[0] if self.system in ['Linux', 'Darwin']: self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) else: - raise Exception("Unknown system") + raise Exception('Unknown system') + def symbolize_address(self, addr, binary, offset): # Use the chain of symbolizers: # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos # (fall back to next symbolizer if the previous one fails). - if not symbolizers.has_key(binary): + if not binary in symbolizers: symbolizers[binary] = ChainSymbolizer( - [BreakpadSymbolizerFactory(addr, binary), llvm_symbolizer]) + [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) result = symbolizers[binary].symbolize(addr, binary, offset) if result is None: # Initialize system symbolizer only if other symbolizers failed. @@ -326,33 +346,44 @@ class SymbolizationLoop(object): SystemSymbolizerFactory(self.system, addr, binary)) result = symbolizers[binary].symbolize(addr, binary, offset) # The system symbolizer must produce some result. - assert(result) + assert result return result - def loop(self): - frame_no = 0 + + def print_symbolized_lines(self, symbolized_lines): + if not symbolized_lines: + print self.current_line + else: + for symbolized_frame in symbolized_lines: + print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip() + self.frame_no += 1 + + def process_stdin(self): + self.frame_no = 0 for line in sys.stdin: + self.current_line = line.rstrip() #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) stack_trace_line_format = ( - "^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)") + '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') match = re.match(stack_trace_line_format, line) if not match: - print line.rstrip() + print self.current_line continue if DEBUG: print line - prefix, frameno_str, addr, binary, offset = match.groups() - if (frameno_str == "0"): + _, frameno_str, addr, binary, offset = match.groups() + if frameno_str == '0': # Assume that frame #0 is the first frame of new stack trace. - frame_no = 0 + self.frame_no = 0 + original_binary = binary + if self.binary_name_filter: + binary = self.binary_name_filter(binary) symbolized_line = self.symbolize_address(addr, binary, offset) if not symbolized_line: - print line.rstrip() - else: - for symbolized_frame in symbolized_line: - print " #" + str(frame_no) + " " + symbolized_frame.rstrip() - frame_no += 1 + if original_binary != binary: + symbolized_line = self.symbolize_address(addr, binary, offset) + self.print_symbolized_lines(symbolized_line) if __name__ == '__main__': loop = SymbolizationLoop() - loop.loop() + loop.process_stdin() |