# synthetic children and summary provider for CFString # (and related NSString class) import lldb def CFString_SummaryProvider (valobj,dict): provider = CFStringSynthProvider(valobj,dict); if provider.invalid == False: return '@'+provider.get_child_at_index(provider.get_child_index("content")).GetSummary(); return '' class CFStringSynthProvider: def __init__(self,valobj,dict): self.valobj = valobj; self.update() # children other than "content" are for debugging only and must not be used in production code def num_children(self): if self.invalid: return 0; return 6; def read_unicode(self, pointer): process = self.valobj.GetTarget().GetProcess() error = lldb.SBError() pystr = u'' # cannot do the read at once because the length value has # a weird encoding. better play it safe here while True: content = process.ReadMemory(pointer, 2, error) new_bytes = bytearray(content) b0 = new_bytes[0] b1 = new_bytes[1] pointer = pointer + 2 if b0 == 0 and b1 == 0: break # rearrange bytes depending on endianness # (do we really need this or is Cocoa going to # use Windows-compatible little-endian even # if the target is big endian?) if self.is_little: value = b1 * 256 + b0 else: value = b0 * 256 + b1 pystr = pystr + unichr(value) return pystr # handle the special case strings # only use the custom code for the tested LP64 case def handle_special(self): if self.lp64 == False: # for 32bit targets, use safe ObjC code return self.handle_unicode_string_safe() offset = 12 pointer = self.valobj.GetValueAsUnsigned(0) + offset pystr = self.read_unicode(pointer) return self.valobj.CreateValueFromExpression("content", "(char*)\"" + pystr.encode('utf-8') + "\"") # last resort call, use ObjC code to read; the final aim is to # be able to strip this call away entirely and only do the read # ourselves def handle_unicode_string_safe(self): return self.valobj.CreateValueFromExpression("content", "(char*)\"" + self.valobj.GetObjectDescription() + "\""); def handle_unicode_string(self): # step 1: find offset if self.inline: pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); if self.explicit == False: # untested, use the safe code path return self.handle_unicode_string_safe(); else: # not sure why 8 bytes are skipped here # (lldb) mem read -c 50 0x00000001001154f0 # 0x1001154f0: 98 1a 85 71 ff 7f 00 00 90 07 00 00 01 00 00 00 ...q?........... # 0x100115500: 03 00 00 00 00 00 00 00 *c3 03 78 00 78 00 00 00 ........?.x.x... # 0x100115510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ # 0x100115520: 00 00 .. # content begins at * (i.e. 8 bytes into variants, skipping void* buffer in # __notInlineImmutable1 entirely, while the length byte is correctly located # for an inline string) pointer = pointer + 8; else: pointer = self.valobj.GetValueAsUnsigned(0) + self.size_of_cfruntime_base(); # read 8 bytes here and make an address out of them vopointer = self.valobj.CreateChildAtOffset("dummy", pointer,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); pointer = vopointer.GetValueAsUnsigned(0) # step 2: read Unicode data at pointer pystr = self.read_unicode(pointer) # step 3: return it return self.valobj.CreateValueFromExpression("content", "(char*)\"" + pystr.encode('utf-8') + "\"") # we read at "the right place" into the __CFString object instead of running code # we are replicating the functionality of __CFStrContents in CFString.c here def handle_UTF8_inline(self): offset = int(self.valobj.GetValue(), 0) + self.size_of_cfruntime_base(); if self.explicit == False: offset = offset + 1; return self.valobj.CreateValueFromAddress("content", offset, self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)).AddressOf(); def handle_UTF8_not_inline(self): offset = self.size_of_cfruntime_base(); return self.valobj.CreateChildAtOffset("content", offset,self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar).GetPointerType()); def get_child_at_index(self,index): if index == 0: return self.valobj.CreateValueFromExpression("mutable", str(int(self.mutable))); if index == 1: return self.valobj.CreateValueFromExpression("inline", str(int(self.inline))); if index == 2: return self.valobj.CreateValueFromExpression("explicit", str(int(self.explicit))); if index == 3: return self.valobj.CreateValueFromExpression("unicode", str(int(self.unicode))); if index == 4: return self.valobj.CreateValueFromExpression("special", str(int(self.special))); if index == 5: if self.unicode == True: return self.handle_unicode_string(); elif self.special == True: return self.handle_special(); elif self.inline == True: return self.handle_UTF8_inline(); else: return self.handle_UTF8_not_inline(); def get_child_index(self,name): if name == "content": return self.num_children() - 1; if name == "mutable": return 0; if name == "inline": return 1; if name == "explicit": return 2; if name == "unicode": return 3; if name == "special": return 4; def is_64bit(self): return self.valobj.GetTarget().GetProcess().GetAddressByteSize() == 8 def is_little_endian(self): return self.valobj.GetTarget().GetProcess().GetByteOrder() == lldb.eByteOrderLittle # CFRuntimeBase is defined as having an additional # 4 bytes (padding?) on LP64 architectures # to get its size we add up sizeof(pointer)+4 # and then add 4 more bytes if we are on a 64bit system def size_of_cfruntime_base(self): if self.lp64 == True: return 8+4+4; else: return 4+4; # the info bits are part of the CFRuntimeBase structure # to get at them we have to skip a uintptr_t and then get # at the least-significant byte of a 4 byte array. If we are # on big-endian this means going to byte 3, if we are on # little endian (OSX & iOS), this means reading byte 0 def offset_of_info_bits(self): if self.lp64 == True: offset = 8; else: offset = 4; if self.is_little == False: offset = offset + 3; return offset; def read_info_bits(self): cfinfo = self.valobj.CreateChildAtOffset("cfinfo", self.offset_of_info_bits(), self.valobj.GetType().GetBasicType(lldb.eBasicTypeChar)); cfinfo.SetFormat(11) info = cfinfo.GetValue(); if info != None: self.invalid = False; return int(info,0); else: self.invalid = True; return None; # calculating internal flag bits of the CFString object # this stuff is defined and discussed in CFString.c def is_mutable(self): return (self.info_bits & 1) == 1; def is_inline(self): return (self.info_bits & 0x60) == 0; # this flag's name is ambiguous, it turns out # we must skip a length byte to get at the data # when this flag is False def has_explicit_length(self): return (self.info_bits & (1 | 4)) != 4; # probably a subclass of NSString. obtained this from [str pathExtension] # here info_bits = 0 and Unicode data at the start of the padding word # in the long run using the isa value might be safer as a way to identify this # instead of reading the info_bits def is_special_case(self): return self.info_bits == 0; def is_unicode(self): return (self.info_bits & 0x10) == 0x10; # preparing ourselves to read into memory # by adjusting architecture-specific info def adjust_for_architecture(self): self.lp64 = self.is_64bit(); self.is_little = self.is_little_endian(); # reading info bits out of the CFString and computing # useful values to get at the real data def compute_flags(self): self.info_bits = self.read_info_bits(); if self.info_bits == None: return; self.mutable = self.is_mutable(); self.inline = self.is_inline(); self.explicit = self.has_explicit_length(); self.unicode = self.is_unicode(); self.special = self.is_special_case(); def update(self): self.adjust_for_architecture(); self.compute_flags();