Package lldb :: Package utils :: Module symbolication
[hide private]
[frames] | no frames]

Source Code for Module lldb.utils.symbolication

  1  #!/usr/bin/python 
  2   
  3  #---------------------------------------------------------------------- 
  4  # Be sure to add the python path that points to the LLDB shared library. 
  5  # 
  6  # To use this in the embedded python interpreter using "lldb": 
  7  # 
  8  #   cd /path/containing/crashlog.py 
  9  #   lldb 
 10  #   (lldb) script import crashlog 
 11  #   "crashlog" command installed, type "crashlog --help" for detailed help 
 12  #   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash 
 13  # 
 14  # The benefit of running the crashlog command inside lldb in the 
 15  # embedded python interpreter is when the command completes, there 
 16  # will be a target with all of the files loaded at the locations 
 17  # described in the crash log. Only the files that have stack frames 
 18  # in the backtrace will be loaded unless the "--load-all" option 
 19  # has been specified. This allows users to explore the program in the 
 20  # state it was in right at crash time. 
 21  # 
 22  # On MacOSX csh, tcsh: 
 23  #   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash ) 
 24  # 
 25  # On MacOSX sh, bash: 
 26  #   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash 
 27  #---------------------------------------------------------------------- 
 28   
 29  import lldb 
 30  import commands 
 31  import optparse 
 32  import os 
 33  import plistlib 
 34  import re 
 35  import shlex 
 36  import sys 
 37  import time 
 38  import uuid 
39 40 41 -class Address:
42 """Class that represents an address that will be symbolicated""" 43
44 - def __init__(self, target, load_addr):
45 self.target = target 46 self.load_addr = load_addr # The load address that this object represents 47 # the resolved lldb.SBAddress (if any), named so_addr for 48 # section/offset address 49 self.so_addr = None 50 self.sym_ctx = None # The cached symbol context for this address 51 # Any original textual description of this address to be used as a 52 # backup in case symbolication fails 53 self.description = None 54 self.symbolication = None # The cached symbolicated string that describes this address 55 self.inlined = False
56
57 - def __str__(self):
58 s = "%#16.16x" % (self.load_addr) 59 if self.symbolication: 60 s += " %s" % (self.symbolication) 61 elif self.description: 62 s += " %s" % (self.description) 63 elif self.so_addr: 64 s += " %s" % (self.so_addr) 65 return s
66
67 - def resolve_addr(self):
68 if self.so_addr is None: 69 self.so_addr = self.target.ResolveLoadAddress(self.load_addr) 70 return self.so_addr
71
72 - def is_inlined(self):
73 return self.inlined
74
75 - def get_symbol_context(self):
76 if self.sym_ctx is None: 77 sb_addr = self.resolve_addr() 78 if sb_addr: 79 self.sym_ctx = self.target.ResolveSymbolContextForAddress( 80 sb_addr, lldb.eSymbolContextEverything) 81 else: 82 self.sym_ctx = lldb.SBSymbolContext() 83 return self.sym_ctx
84
85 - def get_instructions(self):
86 sym_ctx = self.get_symbol_context() 87 if sym_ctx: 88 function = sym_ctx.GetFunction() 89 if function: 90 return function.GetInstructions(self.target) 91 return sym_ctx.GetSymbol().GetInstructions(self.target) 92 return None
93
94 - def symbolicate(self, verbose=False):
95 if self.symbolication is None: 96 self.symbolication = '' 97 self.inlined = False 98 sym_ctx = self.get_symbol_context() 99 if sym_ctx: 100 module = sym_ctx.GetModule() 101 if module: 102 # Print full source file path in verbose mode 103 if verbose: 104 self.symbolication += str(module.GetFileSpec()) + '`' 105 else: 106 self.symbolication += module.GetFileSpec().GetFilename() + '`' 107 function_start_load_addr = -1 108 function = sym_ctx.GetFunction() 109 block = sym_ctx.GetBlock() 110 line_entry = sym_ctx.GetLineEntry() 111 symbol = sym_ctx.GetSymbol() 112 inlined_block = block.GetContainingInlinedBlock() 113 if function: 114 self.symbolication += function.GetName() 115 116 if inlined_block: 117 self.inlined = True 118 self.symbolication += ' [inlined] ' + \ 119 inlined_block.GetInlinedName() 120 block_range_idx = inlined_block.GetRangeIndexForBlockAddress( 121 self.so_addr) 122 if block_range_idx < lldb.UINT32_MAX: 123 block_range_start_addr = inlined_block.GetRangeStartAddress( 124 block_range_idx) 125 function_start_load_addr = block_range_start_addr.GetLoadAddress( 126 self.target) 127 if function_start_load_addr == -1: 128 function_start_load_addr = function.GetStartAddress().GetLoadAddress(self.target) 129 elif symbol: 130 self.symbolication += symbol.GetName() 131 function_start_load_addr = symbol.GetStartAddress().GetLoadAddress(self.target) 132 else: 133 self.symbolication = '' 134 return False 135 136 # Dump the offset from the current function or symbol if it 137 # is non zero 138 function_offset = self.load_addr - function_start_load_addr 139 if function_offset > 0: 140 self.symbolication += " + %u" % (function_offset) 141 elif function_offset < 0: 142 self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset 143 144 # Print out any line information if any is available 145 if line_entry.GetFileSpec(): 146 # Print full source file path in verbose mode 147 if verbose: 148 self.symbolication += ' at %s' % line_entry.GetFileSpec() 149 else: 150 self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename() 151 self.symbolication += ':%u' % line_entry.GetLine() 152 column = line_entry.GetColumn() 153 if column > 0: 154 self.symbolication += ':%u' % column 155 return True 156 return False
157
158 159 -class Section:
160 """Class that represents an load address range""" 161 sect_info_regex = re.compile('(?P<name>[^=]+)=(?P<range>.*)') 162 addr_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$') 163 range_regex = re.compile( 164 '^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$') 165
166 - def __init__(self, start_addr=None, end_addr=None, name=None):
167 self.start_addr = start_addr 168 self.end_addr = end_addr 169 self.name = name
170 171 @classmethod
172 - def InitWithSBTargetAndSBSection(cls, target, section):
173 sect_load_addr = section.GetLoadAddress(target) 174 if sect_load_addr != lldb.LLDB_INVALID_ADDRESS: 175 obj = cls( 176 sect_load_addr, 177 sect_load_addr + 178 section.size, 179 section.name) 180 return obj 181 else: 182 return None
183
184 - def contains(self, addr):
185 return self.start_addr <= addr and addr < self.end_addr
186
187 - def set_from_string(self, s):
188 match = self.sect_info_regex.match(s) 189 if match: 190 self.name = match.group('name') 191 range_str = match.group('range') 192 addr_match = self.addr_regex.match(range_str) 193 if addr_match: 194 self.start_addr = int(addr_match.group('start'), 16) 195 self.end_addr = None 196 return True 197 198 range_match = self.range_regex.match(range_str) 199 if range_match: 200 self.start_addr = int(range_match.group('start'), 16) 201 self.end_addr = int(range_match.group('end'), 16) 202 op = range_match.group('op') 203 if op == '+': 204 self.end_addr += self.start_addr 205 return True 206 print 'error: invalid section info string "%s"' % s 207 print 'Valid section info formats are:' 208 print 'Format Example Description' 209 print '--------------------- -----------------------------------------------' 210 print '<name>=<base> __TEXT=0x123000 Section from base address only' 211 print '<name>=<base>-<end> __TEXT=0x123000-0x124000 Section from base address and end address' 212 print '<name>=<base>+<size> __TEXT=0x123000+0x1000 Section from base address and size' 213 return False
214
215 - def __str__(self):
216 if self.name: 217 if self.end_addr is not None: 218 if self.start_addr is not None: 219 return "%s=[0x%16.16x - 0x%16.16x)" % ( 220 self.name, self.start_addr, self.end_addr) 221 else: 222 if self.start_addr is not None: 223 return "%s=0x%16.16x" % (self.name, self.start_addr) 224 return self.name 225 return "<invalid>"
226
227 228 -class Image:
229 """A class that represents an executable image and any associated data""" 230
231 - def __init__(self, path, uuid=None):
232 self.path = path 233 self.resolved_path = None 234 self.resolved = False 235 self.unavailable = False 236 self.uuid = uuid 237 self.section_infos = list() 238 self.identifier = None 239 self.version = None 240 self.arch = None 241 self.module = None 242 self.symfile = None 243 self.slide = None
244 245 @classmethod
246 - def InitWithSBTargetAndSBModule(cls, target, module):
247 '''Initialize this Image object with a module from a target.''' 248 obj = cls(module.file.fullpath, module.uuid) 249 obj.resolved_path = module.platform_file.fullpath 250 obj.resolved = True 251 obj.arch = module.triple 252 for section in module.sections: 253 symb_section = Section.InitWithSBTargetAndSBSection( 254 target, section) 255 if symb_section: 256 obj.section_infos.append(symb_section) 257 obj.arch = module.triple 258 obj.module = module 259 obj.symfile = None 260 obj.slide = None 261 return obj
262
263 - def dump(self, prefix):
264 print "%s%s" % (prefix, self)
265
266 - def debug_dump(self):
267 print 'path = "%s"' % (self.path) 268 print 'resolved_path = "%s"' % (self.resolved_path) 269 print 'resolved = %i' % (self.resolved) 270 print 'unavailable = %i' % (self.unavailable) 271 print 'uuid = %s' % (self.uuid) 272 print 'section_infos = %s' % (self.section_infos) 273 print 'identifier = "%s"' % (self.identifier) 274 print 'version = %s' % (self.version) 275 print 'arch = %s' % (self.arch) 276 print 'module = %s' % (self.module) 277 print 'symfile = "%s"' % (self.symfile) 278 print 'slide = %i (0x%x)' % (self.slide, self.slide)
279
280 - def __str__(self):
281 s = '' 282 if self.uuid: 283 s += "%s " % (self.get_uuid()) 284 if self.arch: 285 s += "%s " % (self.arch) 286 if self.version: 287 s += "%s " % (self.version) 288 resolved_path = self.get_resolved_path() 289 if resolved_path: 290 s += "%s " % (resolved_path) 291 for section_info in self.section_infos: 292 s += ", %s" % (section_info) 293 if self.slide is not None: 294 s += ', slide = 0x%16.16x' % self.slide 295 return s
296
297 - def add_section(self, section):
298 # print "added '%s' to '%s'" % (section, self.path) 299 self.section_infos.append(section)
300
301 - def get_section_containing_load_addr(self, load_addr):
302 for section_info in self.section_infos: 303 if section_info.contains(load_addr): 304 return section_info 305 return None
306
307 - def get_resolved_path(self):
308 if self.resolved_path: 309 return self.resolved_path 310 elif self.path: 311 return self.path 312 return None
313
315 path = self.get_resolved_path() 316 if path: 317 return os.path.basename(path) 318 return None
319
320 - def symfile_basename(self):
321 if self.symfile: 322 return os.path.basename(self.symfile) 323 return None
324
325 - def has_section_load_info(self):
326 return self.section_infos or self.slide is not None
327
328 - def load_module(self, target):
329 if self.unavailable: 330 return None # We already warned that we couldn't find this module, so don't return an error string 331 # Load this module into "target" using the section infos to 332 # set the section load addresses 333 if self.has_section_load_info(): 334 if target: 335 if self.module: 336 if self.section_infos: 337 num_sections_loaded = 0 338 for section_info in self.section_infos: 339 if section_info.name: 340 section = self.module.FindSection( 341 section_info.name) 342 if section: 343 error = target.SetSectionLoadAddress( 344 section, section_info.start_addr) 345 if error.Success(): 346 num_sections_loaded += 1 347 else: 348 return 'error: %s' % error.GetCString() 349 else: 350 return 'error: unable to find the section named "%s"' % section_info.name 351 else: 352 return 'error: unable to find "%s" section in "%s"' % ( 353 range.name, self.get_resolved_path()) 354 if num_sections_loaded == 0: 355 return 'error: no sections were successfully loaded' 356 else: 357 err = target.SetModuleLoadAddress( 358 self.module, self.slide) 359 if err.Fail(): 360 return err.GetCString() 361 return None 362 else: 363 return 'error: invalid module' 364 else: 365 return 'error: invalid target' 366 else: 367 return 'error: no section infos'
368
369 - def add_module(self, target):
370 '''Add the Image described in this object to "target" and load the sections if "load" is True.''' 371 if target: 372 # Try and find using UUID only first so that paths need not match 373 # up 374 uuid_str = self.get_normalized_uuid_string() 375 if uuid_str: 376 self.module = target.AddModule(None, None, uuid_str) 377 if not self.module: 378 self.locate_module_and_debug_symbols() 379 if self.unavailable: 380 return None 381 resolved_path = self.get_resolved_path() 382 self.module = target.AddModule( 383 resolved_path, self.arch, uuid_str, self.symfile) 384 if not self.module: 385 return 'error: unable to get module for (%s) "%s"' % ( 386 self.arch, self.get_resolved_path()) 387 if self.has_section_load_info(): 388 return self.load_module(target) 389 else: 390 return None # No sections, the module was added to the target, so success 391 else: 392 return 'error: invalid target'
393
395 # By default, just use the paths that were supplied in: 396 # self.path 397 # self.resolved_path 398 # self.module 399 # self.symfile 400 # Subclasses can inherit from this class and override this function 401 self.resolved = True 402 return True
403
404 - def get_uuid(self):
405 if not self.uuid and self.module: 406 self.uuid = uuid.UUID(self.module.GetUUIDString()) 407 return self.uuid
408
410 if self.uuid: 411 return str(self.uuid).upper() 412 return None
413
414 - def create_target(self):
415 '''Create a target using the information in this Image object.''' 416 if self.unavailable: 417 return None 418 419 if self.locate_module_and_debug_symbols(): 420 resolved_path = self.get_resolved_path() 421 path_spec = lldb.SBFileSpec(resolved_path) 422 #result.PutCString ('plist[%s] = %s' % (uuid, self.plist)) 423 error = lldb.SBError() 424 target = lldb.debugger.CreateTarget( 425 resolved_path, self.arch, None, False, error) 426 if target: 427 self.module = target.FindModule(path_spec) 428 if self.has_section_load_info(): 429 err = self.load_module(target) 430 if err: 431 print 'ERROR: ', err 432 return target 433 else: 434 print 'error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path) 435 else: 436 print 'error: unable to locate main executable (%s) "%s"' % (self.arch, self.path) 437 return None
438
439 440 -class Symbolicator:
441
442 - def __init__(self):
443 """A class the represents the information needed to symbolicate addresses in a program""" 444 self.target = None 445 self.images = list() # a list of images to be used when symbolicating 446 self.addr_mask = 0xffffffffffffffff
447 448 @classmethod
449 - def InitWithSBTarget(cls, target):
450 obj = cls() 451 obj.target = target 452 obj.images = list() 453 triple = target.triple 454 if triple: 455 arch = triple.split('-')[0] 456 if "arm" in arch: 457 obj.addr_mask = 0xfffffffffffffffe 458 459 for module in target.modules: 460 image = Image.InitWithSBTargetAndSBModule(target, module) 461 obj.images.append(image) 462 return obj
463
464 - def __str__(self):
465 s = "Symbolicator:\n" 466 if self.target: 467 s += "Target = '%s'\n" % (self.target) 468 s += "Target modules:\n" 469 for m in self.target.modules: 470 s += str(m) + "\n" 471 s += "Images:\n" 472 for image in self.images: 473 s += ' %s\n' % (image) 474 return s
475
476 - def find_images_with_identifier(self, identifier):
477 images = list() 478 for image in self.images: 479 if image.identifier == identifier: 480 images.append(image) 481 if len(images) == 0: 482 regex_text = '^.*\.%s$' % (re.escape(identifier)) 483 regex = re.compile(regex_text) 484 for image in self.images: 485 if regex.match(image.identifier): 486 images.append(image) 487 return images
488
489 - def find_image_containing_load_addr(self, load_addr):
490 for image in self.images: 491 if image.get_section_containing_load_addr(load_addr): 492 return image 493 return None
494
495 - def create_target(self):
496 if self.target: 497 return self.target 498 499 if self.images: 500 for image in self.images: 501 self.target = image.create_target() 502 if self.target: 503 if self.target.GetAddressByteSize() == 4: 504 triple = self.target.triple 505 if triple: 506 arch = triple.split('-')[0] 507 if "arm" in arch: 508 self.addr_mask = 0xfffffffffffffffe 509 return self.target 510 return None
511
512 - def symbolicate(self, load_addr, verbose=False):
513 if not self.target: 514 self.create_target() 515 if self.target: 516 live_process = False 517 process = self.target.process 518 if process: 519 state = process.state 520 if state > lldb.eStateUnloaded and state < lldb.eStateDetached: 521 live_process = True 522 # If we don't have a live process, we can attempt to find the image 523 # that a load address belongs to and lazily load its module in the 524 # target, but we shouldn't do any of this if we have a live process 525 if not live_process: 526 image = self.find_image_containing_load_addr(load_addr) 527 if image: 528 image.add_module(self.target) 529 symbolicated_address = Address(self.target, load_addr) 530 if symbolicated_address.symbolicate(verbose): 531 if symbolicated_address.so_addr: 532 symbolicated_addresses = list() 533 symbolicated_addresses.append(symbolicated_address) 534 # See if we were able to reconstruct anything? 535 while True: 536 inlined_parent_so_addr = lldb.SBAddress() 537 inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope( 538 symbolicated_address.so_addr, inlined_parent_so_addr) 539 if not inlined_parent_sym_ctx: 540 break 541 if not inlined_parent_so_addr: 542 break 543 544 symbolicated_address = Address( 545 self.target, inlined_parent_so_addr.GetLoadAddress( 546 self.target)) 547 symbolicated_address.sym_ctx = inlined_parent_sym_ctx 548 symbolicated_address.so_addr = inlined_parent_so_addr 549 symbolicated_address.symbolicate(verbose) 550 551 # push the new frame onto the new frame stack 552 symbolicated_addresses.append(symbolicated_address) 553 554 if symbolicated_addresses: 555 return symbolicated_addresses 556 else: 557 print 'error: no target in Symbolicator' 558 return None
559
560 561 -def disassemble_instructions( 562 target, 563 instructions, 564 pc, 565 insts_before_pc, 566 insts_after_pc, 567 non_zeroeth_frame):
568 lines = list() 569 pc_index = -1 570 comment_column = 50 571 for inst_idx, inst in enumerate(instructions): 572 inst_pc = inst.GetAddress().GetLoadAddress(target) 573 if pc == inst_pc: 574 pc_index = inst_idx 575 mnemonic = inst.GetMnemonic(target) 576 operands = inst.GetOperands(target) 577 comment = inst.GetComment(target) 578 #data = inst.GetData (target) 579 lines.append("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands)) 580 if comment: 581 line_len = len(lines[-1]) 582 if line_len < comment_column: 583 lines[-1] += ' ' * (comment_column - line_len) 584 lines[-1] += "; %s" % comment 585 586 if pc_index >= 0: 587 # If we are disassembling the non-zeroeth frame, we need to backup the 588 # PC by 1 589 if non_zeroeth_frame and pc_index > 0: 590 pc_index = pc_index - 1 591 if insts_before_pc == -1: 592 start_idx = 0 593 else: 594 start_idx = pc_index - insts_before_pc 595 if start_idx < 0: 596 start_idx = 0 597 if insts_before_pc == -1: 598 end_idx = inst_idx 599 else: 600 end_idx = pc_index + insts_after_pc 601 if end_idx > inst_idx: 602 end_idx = inst_idx 603 for i in range(start_idx, end_idx + 1): 604 if i == pc_index: 605 print ' -> ', lines[i] 606 else: 607 print ' ', lines[i]
608 617 626 631 636
637 638 -def Symbolicate(command_args):
639 640 usage = "usage: %prog [options] <addr1> [addr2 ...]" 641 description = '''Symbolicate one or more addresses using LLDB's python scripting API..''' 642 parser = optparse.OptionParser( 643 description=description, 644 prog='crashlog.py', 645 usage=usage) 646 parser.add_option( 647 '-v', 648 '--verbose', 649 action='store_true', 650 dest='verbose', 651 help='display verbose debug info', 652 default=False) 653 parser.add_option( 654 '-p', 655 '--platform', 656 type='string', 657 metavar='platform', 658 dest='platform', 659 help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".') 660 parser.add_option( 661 '-f', 662 '--file', 663 type='string', 664 metavar='file', 665 dest='file', 666 help='Specify a file to use when symbolicating') 667 parser.add_option( 668 '-a', 669 '--arch', 670 type='string', 671 metavar='arch', 672 dest='arch', 673 help='Specify a architecture to use when symbolicating') 674 parser.add_option( 675 '-s', 676 '--slide', 677 type='int', 678 metavar='slide', 679 dest='slide', 680 help='Specify the slide to use on the file specified with the --file option', 681 default=None) 682 parser.add_option( 683 '--section', 684 type='string', 685 action='append', 686 dest='section_strings', 687 help='specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>') 688 try: 689 (options, args) = parser.parse_args(command_args) 690 except: 691 return 692 symbolicator = Symbolicator() 693 images = list() 694 if options.file: 695 image = Image(options.file) 696 image.arch = options.arch 697 # Add any sections that were specified with one or more --section 698 # options 699 if options.section_strings: 700 for section_str in options.section_strings: 701 section = Section() 702 if section.set_from_string(section_str): 703 image.add_section(section) 704 else: 705 sys.exit(1) 706 if options.slide is not None: 707 image.slide = options.slide 708 symbolicator.images.append(image) 709 710 target = symbolicator.create_target() 711 if options.verbose: 712 print symbolicator 713 if target: 714 for addr_str in args: 715 addr = int(addr_str, 0) 716 symbolicated_addrs = symbolicator.symbolicate( 717 addr, options.verbose) 718 for symbolicated_addr in symbolicated_addrs: 719 print symbolicated_addr 720 print 721 else: 722 print 'error: no target for %s' % (symbolicator)
723 724 if __name__ == '__main__': 725 # Create a new debugger instance 726 lldb.debugger = lldb.SBDebugger.Create() 727 Symbolicate(sys.argv[1:]) 728