//===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "MinidumpParser.h" #include "NtStructures.h" #include "RegisterContextMinidump_x86_32.h" #include "Plugins/Process/Utility/LinuxProcMaps.h" #include "lldb/Utility/LLDBAssert.h" #include "lldb/Utility/Log.h" // C includes // C++ includes #include #include #include #include using namespace lldb_private; using namespace minidump; llvm::Expected MinidumpParser::Create(const lldb::DataBufferSP &data_sp) { auto ExpectedFile = llvm::object::MinidumpFile::create( llvm::MemoryBufferRef(toStringRef(data_sp->GetData()), "minidump")); if (!ExpectedFile) return ExpectedFile.takeError(); return MinidumpParser(data_sp, std::move(*ExpectedFile)); } MinidumpParser::MinidumpParser(lldb::DataBufferSP data_sp, std::unique_ptr file) : m_data_sp(std::move(data_sp)), m_file(std::move(file)) {} llvm::ArrayRef MinidumpParser::GetData() { return llvm::ArrayRef(m_data_sp->GetBytes(), m_data_sp->GetByteSize()); } llvm::ArrayRef MinidumpParser::GetStream(StreamType stream_type) { return m_file->getRawStream(stream_type) .getValueOr(llvm::ArrayRef()); } UUID MinidumpParser::GetModuleUUID(const minidump::Module *module) { auto cv_record = GetData().slice(module->CvRecord.RVA, module->CvRecord.DataSize); // Read the CV record signature const llvm::support::ulittle32_t *signature = nullptr; Status error = consumeObject(cv_record, signature); if (error.Fail()) return UUID(); const CvSignature cv_signature = static_cast(static_cast(*signature)); if (cv_signature == CvSignature::Pdb70) { const CvRecordPdb70 *pdb70_uuid = nullptr; Status error = consumeObject(cv_record, pdb70_uuid); if (error.Fail()) return UUID(); CvRecordPdb70 swapped; if (!GetArchitecture().GetTriple().isOSBinFormatELF()) { // LLDB's UUID class treats the data as a sequence of bytes, but breakpad // interprets it as a sequence of little-endian fields, which it converts // to big-endian when converting to text. Swap the bytes to big endian so // that the string representation comes out right. swapped = *pdb70_uuid; llvm::sys::swapByteOrder(swapped.Uuid.Data1); llvm::sys::swapByteOrder(swapped.Uuid.Data2); llvm::sys::swapByteOrder(swapped.Uuid.Data3); llvm::sys::swapByteOrder(swapped.Age); pdb70_uuid = &swapped; } if (pdb70_uuid->Age != 0) return UUID::fromOptionalData(pdb70_uuid, sizeof(*pdb70_uuid)); return UUID::fromOptionalData(&pdb70_uuid->Uuid, sizeof(pdb70_uuid->Uuid)); } else if (cv_signature == CvSignature::ElfBuildId) return UUID::fromOptionalData(cv_record); return UUID(); } llvm::ArrayRef MinidumpParser::GetThreads() { auto ExpectedThreads = GetMinidumpFile().getThreadList(); if (ExpectedThreads) return *ExpectedThreads; LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD), ExpectedThreads.takeError(), "Failed to read thread list: {0}"); return {}; } llvm::ArrayRef MinidumpParser::GetThreadContext(const LocationDescriptor &location) { if (location.RVA + location.DataSize > GetData().size()) return {}; return GetData().slice(location.RVA, location.DataSize); } llvm::ArrayRef MinidumpParser::GetThreadContext(const minidump::Thread &td) { return GetThreadContext(td.Context); } llvm::ArrayRef MinidumpParser::GetThreadContextWow64(const minidump::Thread &td) { // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If // the minidump was captured with a 64-bit debugger, then the CONTEXT we just // grabbed from the mini_dump_thread is the one for the 64-bit "native" // process rather than the 32-bit "guest" process we care about. In this // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment // Block) of the 64-bit process. auto teb_mem = GetMemory(td.EnvironmentBlock, sizeof(TEB64)); if (teb_mem.empty()) return {}; const TEB64 *wow64teb; Status error = consumeObject(teb_mem, wow64teb); if (error.Fail()) return {}; // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure // that includes the 32-bit CONTEXT (after a ULONG). See: // https://msdn.microsoft.com/en-us/library/ms681670.aspx auto context = GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32)); if (context.size() < sizeof(MinidumpContext_x86_32)) return {}; return context; // NOTE: We don't currently use the TEB for anything else. If we // need it in the future, the 32-bit TEB is located according to the address // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]). } ArchSpec MinidumpParser::GetArchitecture() { if (m_arch.IsValid()) return m_arch; // Set the architecture in m_arch llvm::Expected system_info = m_file->getSystemInfo(); if (!system_info) { LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS), system_info.takeError(), "Failed to read SystemInfo stream: {0}"); return m_arch; } // TODO what to do about big endiand flavors of arm ? // TODO set the arm subarch stuff if the minidump has info about it llvm::Triple triple; triple.setVendor(llvm::Triple::VendorType::UnknownVendor); switch (system_info->ProcessorArch) { case ProcessorArchitecture::X86: triple.setArch(llvm::Triple::ArchType::x86); break; case ProcessorArchitecture::AMD64: triple.setArch(llvm::Triple::ArchType::x86_64); break; case ProcessorArchitecture::ARM: triple.setArch(llvm::Triple::ArchType::arm); break; case ProcessorArchitecture::ARM64: case ProcessorArchitecture::BP_ARM64: triple.setArch(llvm::Triple::ArchType::aarch64); break; default: triple.setArch(llvm::Triple::ArchType::UnknownArch); break; } // TODO add all of the OSes that Minidump/breakpad distinguishes? switch (system_info->PlatformId) { case OSPlatform::Win32S: case OSPlatform::Win32Windows: case OSPlatform::Win32NT: case OSPlatform::Win32CE: triple.setOS(llvm::Triple::OSType::Win32); triple.setVendor(llvm::Triple::VendorType::PC); break; case OSPlatform::Linux: triple.setOS(llvm::Triple::OSType::Linux); break; case OSPlatform::MacOSX: triple.setOS(llvm::Triple::OSType::MacOSX); triple.setVendor(llvm::Triple::Apple); break; case OSPlatform::IOS: triple.setOS(llvm::Triple::OSType::IOS); triple.setVendor(llvm::Triple::Apple); break; case OSPlatform::Android: triple.setOS(llvm::Triple::OSType::Linux); triple.setEnvironment(llvm::Triple::EnvironmentType::Android); break; default: { triple.setOS(llvm::Triple::OSType::UnknownOS); auto ExpectedCSD = m_file->getString(system_info->CSDVersionRVA); if (!ExpectedCSD) { LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS), ExpectedCSD.takeError(), "Failed to CSD Version string: {0}"); } else { if (ExpectedCSD->find("Linux") != std::string::npos) triple.setOS(llvm::Triple::OSType::Linux); } break; } } m_arch.SetTriple(triple); return m_arch; } const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() { llvm::ArrayRef data = GetStream(StreamType::MiscInfo); if (data.size() == 0) return nullptr; return MinidumpMiscInfo::Parse(data); } llvm::Optional MinidumpParser::GetLinuxProcStatus() { llvm::ArrayRef data = GetStream(StreamType::LinuxProcStatus); if (data.size() == 0) return llvm::None; return LinuxProcStatus::Parse(data); } llvm::Optional MinidumpParser::GetPid() { const MinidumpMiscInfo *misc_info = GetMiscInfo(); if (misc_info != nullptr) { return misc_info->GetPid(); } llvm::Optional proc_status = GetLinuxProcStatus(); if (proc_status.hasValue()) { return proc_status->GetPid(); } return llvm::None; } llvm::ArrayRef MinidumpParser::GetModuleList() { auto ExpectedModules = GetMinidumpFile().getModuleList(); if (ExpectedModules) return *ExpectedModules; LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES), ExpectedModules.takeError(), "Failed to read module list: {0}"); return {}; } std::vector MinidumpParser::GetFilteredModuleList() { Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES); auto ExpectedModules = GetMinidumpFile().getModuleList(); if (!ExpectedModules) { LLDB_LOG_ERROR(log, ExpectedModules.takeError(), "Failed to read module list: {0}"); return {}; } // map module_name -> filtered_modules index typedef llvm::StringMap MapType; MapType module_name_to_filtered_index; std::vector filtered_modules; for (const auto &module : *ExpectedModules) { auto ExpectedName = m_file->getString(module.ModuleNameRVA); if (!ExpectedName) { LLDB_LOG_ERROR(log, ExpectedName.takeError(), "Failed to get module name: {0}"); continue; } MapType::iterator iter; bool inserted; // See if we have inserted this module aready into filtered_modules. If we // haven't insert an entry into module_name_to_filtered_index with the // index where we will insert it if it isn't in the vector already. std::tie(iter, inserted) = module_name_to_filtered_index.try_emplace( *ExpectedName, filtered_modules.size()); if (inserted) { // This module has not been seen yet, insert it into filtered_modules at // the index that was inserted into module_name_to_filtered_index using // "filtered_modules.size()" above. filtered_modules.push_back(&module); } else { // This module has been seen. Modules are sometimes mentioned multiple // times when they are mapped discontiguously, so find the module with // the lowest "base_of_image" and use that as the filtered module. auto dup_module = filtered_modules[iter->second]; if (module.BaseOfImage < dup_module->BaseOfImage) filtered_modules[iter->second] = &module; } } return filtered_modules; } const minidump::ExceptionStream *MinidumpParser::GetExceptionStream() { auto ExpectedStream = GetMinidumpFile().getExceptionStream(); if (ExpectedStream) return &*ExpectedStream; LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS), ExpectedStream.takeError(), "Failed to read minidump exception stream: {0}"); return nullptr; } llvm::Optional MinidumpParser::FindMemoryRange(lldb::addr_t addr) { llvm::ArrayRef data64 = GetStream(StreamType::Memory64List); Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES); auto ExpectedMemory = GetMinidumpFile().getMemoryList(); if (!ExpectedMemory) { LLDB_LOG_ERROR(log, ExpectedMemory.takeError(), "Failed to read memory list: {0}"); } else { for (const auto &memory_desc : *ExpectedMemory) { const LocationDescriptor &loc_desc = memory_desc.Memory; const lldb::addr_t range_start = memory_desc.StartOfMemoryRange; const size_t range_size = loc_desc.DataSize; if (loc_desc.RVA + loc_desc.DataSize > GetData().size()) return llvm::None; if (range_start <= addr && addr < range_start + range_size) { auto ExpectedSlice = GetMinidumpFile().getRawData(loc_desc); if (!ExpectedSlice) { LLDB_LOG_ERROR(log, ExpectedSlice.takeError(), "Failed to get memory slice: {0}"); return llvm::None; } return minidump::Range(range_start, *ExpectedSlice); } } } // Some Minidumps have a Memory64ListStream that captures all the heap memory // (full-memory Minidumps). We can't exactly use the same loop as above, // because the Minidump uses slightly different data structures to describe // those if (!data64.empty()) { llvm::ArrayRef memory64_list; uint64_t base_rva; std::tie(memory64_list, base_rva) = MinidumpMemoryDescriptor64::ParseMemory64List(data64); if (memory64_list.empty()) return llvm::None; for (const auto &memory_desc64 : memory64_list) { const lldb::addr_t range_start = memory_desc64.start_of_memory_range; const size_t range_size = memory_desc64.data_size; if (base_rva + range_size > GetData().size()) return llvm::None; if (range_start <= addr && addr < range_start + range_size) { return minidump::Range(range_start, GetData().slice(base_rva, range_size)); } base_rva += range_size; } } return llvm::None; } llvm::ArrayRef MinidumpParser::GetMemory(lldb::addr_t addr, size_t size) { // I don't have a sense of how frequently this is called or how many memory // ranges a Minidump typically has, so I'm not sure if searching for the // appropriate range linearly each time is stupid. Perhaps we should build // an index for faster lookups. llvm::Optional range = FindMemoryRange(addr); if (!range) return {}; // There's at least some overlap between the beginning of the desired range // (addr) and the current range. Figure out where the overlap begins and how // much overlap there is. const size_t offset = addr - range->start; if (addr < range->start || offset >= range->range_ref.size()) return {}; const size_t overlap = std::min(size, range->range_ref.size() - offset); return range->range_ref.slice(offset, overlap); } static bool CreateRegionsCacheFromLinuxMaps(MinidumpParser &parser, std::vector ®ions) { auto data = parser.GetStream(StreamType::LinuxMaps); if (data.empty()) return false; ParseLinuxMapRegions(llvm::toStringRef(data), [&](const lldb_private::MemoryRegionInfo ®ion, const lldb_private::Status &status) -> bool { if (status.Success()) regions.push_back(region); return true; }); return !regions.empty(); } static bool CreateRegionsCacheFromMemoryInfoList(MinidumpParser &parser, std::vector ®ions) { Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES); auto ExpectedInfo = parser.GetMinidumpFile().getMemoryInfoList(); if (!ExpectedInfo) { LLDB_LOG_ERROR(log, ExpectedInfo.takeError(), "Failed to read memory info list: {0}"); return false; } constexpr auto yes = MemoryRegionInfo::eYes; constexpr auto no = MemoryRegionInfo::eNo; for (const MemoryInfo &entry : *ExpectedInfo) { MemoryRegionInfo region; region.GetRange().SetRangeBase(entry.BaseAddress); region.GetRange().SetByteSize(entry.RegionSize); MemoryProtection prot = entry.Protect; region.SetReadable(bool(prot & MemoryProtection::NoAccess) ? no : yes); region.SetWritable( bool(prot & (MemoryProtection::ReadWrite | MemoryProtection::WriteCopy | MemoryProtection::ExecuteReadWrite | MemoryProtection::ExeciteWriteCopy)) ? yes : no); region.SetExecutable( bool(prot & (MemoryProtection::Execute | MemoryProtection::ExecuteRead | MemoryProtection::ExecuteReadWrite | MemoryProtection::ExeciteWriteCopy)) ? yes : no); region.SetMapped(entry.State != MemoryState::Free ? yes : no); regions.push_back(region); } return !regions.empty(); } static bool CreateRegionsCacheFromMemoryList(MinidumpParser &parser, std::vector ®ions) { Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES); auto ExpectedMemory = parser.GetMinidumpFile().getMemoryList(); if (!ExpectedMemory) { LLDB_LOG_ERROR(log, ExpectedMemory.takeError(), "Failed to read memory list: {0}"); return false; } regions.reserve(ExpectedMemory->size()); for (const MemoryDescriptor &memory_desc : *ExpectedMemory) { if (memory_desc.Memory.DataSize == 0) continue; MemoryRegionInfo region; region.GetRange().SetRangeBase(memory_desc.StartOfMemoryRange); region.GetRange().SetByteSize(memory_desc.Memory.DataSize); region.SetReadable(MemoryRegionInfo::eYes); region.SetMapped(MemoryRegionInfo::eYes); regions.push_back(region); } regions.shrink_to_fit(); return !regions.empty(); } static bool CreateRegionsCacheFromMemory64List(MinidumpParser &parser, std::vector ®ions) { llvm::ArrayRef data = parser.GetStream(StreamType::Memory64List); if (data.empty()) return false; llvm::ArrayRef memory64_list; uint64_t base_rva; std::tie(memory64_list, base_rva) = MinidumpMemoryDescriptor64::ParseMemory64List(data); if (memory64_list.empty()) return false; regions.reserve(memory64_list.size()); for (const auto &memory_desc : memory64_list) { if (memory_desc.data_size == 0) continue; MemoryRegionInfo region; region.GetRange().SetRangeBase(memory_desc.start_of_memory_range); region.GetRange().SetByteSize(memory_desc.data_size); region.SetReadable(MemoryRegionInfo::eYes); region.SetMapped(MemoryRegionInfo::eYes); regions.push_back(region); } regions.shrink_to_fit(); return !regions.empty(); } std::pair MinidumpParser::BuildMemoryRegions() { // We create the region cache using the best source. We start with // the linux maps since they are the most complete and have names for the // regions. Next we try the MemoryInfoList since it has // read/write/execute/map data, and then fall back to the MemoryList and // Memory64List to just get a list of the memory that is mapped in this // core file MemoryRegionInfos result; const auto &return_sorted = [&](bool is_complete) { llvm::sort(result); return std::make_pair(std::move(result), is_complete); }; if (CreateRegionsCacheFromLinuxMaps(*this, result)) return return_sorted(true); if (CreateRegionsCacheFromMemoryInfoList(*this, result)) return return_sorted(true); if (CreateRegionsCacheFromMemoryList(*this, result)) return return_sorted(false); CreateRegionsCacheFromMemory64List(*this, result); return return_sorted(false); } #define ENUM_TO_CSTR(ST) \ case StreamType::ST: \ return #ST llvm::StringRef MinidumpParser::GetStreamTypeAsString(StreamType stream_type) { switch (stream_type) { ENUM_TO_CSTR(Unused); ENUM_TO_CSTR(ThreadList); ENUM_TO_CSTR(ModuleList); ENUM_TO_CSTR(MemoryList); ENUM_TO_CSTR(Exception); ENUM_TO_CSTR(SystemInfo); ENUM_TO_CSTR(ThreadExList); ENUM_TO_CSTR(Memory64List); ENUM_TO_CSTR(CommentA); ENUM_TO_CSTR(CommentW); ENUM_TO_CSTR(HandleData); ENUM_TO_CSTR(FunctionTable); ENUM_TO_CSTR(UnloadedModuleList); ENUM_TO_CSTR(MiscInfo); ENUM_TO_CSTR(MemoryInfoList); ENUM_TO_CSTR(ThreadInfoList); ENUM_TO_CSTR(HandleOperationList); ENUM_TO_CSTR(Token); ENUM_TO_CSTR(JavascriptData); ENUM_TO_CSTR(SystemMemoryInfo); ENUM_TO_CSTR(ProcessVMCounters); ENUM_TO_CSTR(LastReserved); ENUM_TO_CSTR(BreakpadInfo); ENUM_TO_CSTR(AssertionInfo); ENUM_TO_CSTR(LinuxCPUInfo); ENUM_TO_CSTR(LinuxProcStatus); ENUM_TO_CSTR(LinuxLSBRelease); ENUM_TO_CSTR(LinuxCMDLine); ENUM_TO_CSTR(LinuxEnviron); ENUM_TO_CSTR(LinuxAuxv); ENUM_TO_CSTR(LinuxMaps); ENUM_TO_CSTR(LinuxDSODebug); ENUM_TO_CSTR(LinuxProcStat); ENUM_TO_CSTR(LinuxProcUptime); ENUM_TO_CSTR(LinuxProcFD); ENUM_TO_CSTR(FacebookAppCustomData); ENUM_TO_CSTR(FacebookBuildID); ENUM_TO_CSTR(FacebookAppVersionName); ENUM_TO_CSTR(FacebookJavaStack); ENUM_TO_CSTR(FacebookDalvikInfo); ENUM_TO_CSTR(FacebookUnwindSymbols); ENUM_TO_CSTR(FacebookDumpErrorLog); ENUM_TO_CSTR(FacebookAppStateLog); ENUM_TO_CSTR(FacebookAbortReason); ENUM_TO_CSTR(FacebookThreadName); ENUM_TO_CSTR(FacebookLogcat); } return "unknown stream type"; }