From 10afee3916b0c812511e8f285a9cd6f38a197bd4 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 4 Jan 2022 16:03:22 -0800 Subject: [PATCH] Add INLINE and INLINE_ORIGIN records on Windows dump_syms This adds INLINE and INLINE_ORIGIN records on Window dump_syms. It also adds more LINE records that represents the inner most callsite line info inside a function. Bug: chromium:1190878 Change-Id: I15c2044709f8ca831b03a453910d036f749452c6 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3133606 Reviewed-by: Lei Zhang Reviewed-by: Joshua Peraza Reviewed-by: Ivan Penkov --- src/common/windows/pdb_source_line_writer.cc | 347 ++++++++++++++++--- src/common/windows/pdb_source_line_writer.h | 140 +++++++- src/tools/windows/dump_syms/dump_syms.cc | 55 ++- 3 files changed, 482 insertions(+), 60 deletions(-) diff --git a/src/common/windows/pdb_source_line_writer.cc b/src/common/windows/pdb_source_line_writer.cc index 08d52635..ca0c6a39 100644 --- a/src/common/windows/pdb_source_line_writer.cc +++ b/src/common/windows/pdb_source_line_writer.cc @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,8 @@ namespace google_breakpad { namespace { +using std::set; +using std::unique_ptr; using std::vector; // The symbol (among possibly many) selected to represent an rva. @@ -208,9 +211,160 @@ void StripLlvmSuffixAndUndecorate(BSTR* name) { } // namespace -PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) { +PDBSourceLineWriter::Inline::Inline(int inline_nest_level) + : inline_nest_level_(inline_nest_level) {} + +void PDBSourceLineWriter::Inline::SetOriginId(int origin_id) { + origin_id_ = origin_id; } +void PDBSourceLineWriter::Inline::ExtendRanges(const Line& line) { + if (ranges_.empty()) { + ranges_[line.rva] = line.length; + return; + } + auto iter = ranges_.lower_bound(line.rva); + // There is no overlap if this function is called with inlinee lines from + // the same callsite. + if (iter == ranges_.begin()) { + return; + } + if (line.rva + line.length == iter->first) { + // If they are connected, merge their ranges into one. + DWORD length = line.length + iter->second; + ranges_.erase(iter); + ranges_[line.rva] = length; + } else { + --iter; + if (iter->first + iter->second == line.rva) { + ranges_[iter->first] = iter->second + line.length; + } else { + ranges_[line.rva] = line.length; + } + } +} + +void PDBSourceLineWriter::Inline::SetCallSiteLine(DWORD call_site_line) { + call_site_line_ = call_site_line; +} + +void PDBSourceLineWriter::Inline::SetCallSiteFileId(DWORD call_site_file_id) { + call_site_file_id_ = call_site_file_id; +} + +void PDBSourceLineWriter::Inline::SetChildInlines( + vector> child_inlines) { + child_inlines_ = std::move(child_inlines); +} + +void PDBSourceLineWriter::Inline::Print(FILE* output) const { + // Ignore INLINE record that doesn't have any range. + if (ranges_.empty()) + return; + fprintf(output, "INLINE %d %lu %lu %d", inline_nest_level_, call_site_line_, + call_site_file_id_, origin_id_); + for (const auto& r : ranges_) { + fprintf(output, " %lx %lx", r.first, r.second); + } + fprintf(output, "\n"); + for (const unique_ptr& in : child_inlines_) { + in->Print(output); + } +} + +const PDBSourceLineWriter::Line* PDBSourceLineWriter::Lines::GetLine( + DWORD rva) const { + auto iter = line_map_.find(rva); + if (iter == line_map_.end()) { + // If not found exact rva, check if it's within any range. + iter = line_map_.lower_bound(rva); + if (iter == line_map_.begin()) + return nullptr; + --iter; + auto l = iter->second; + // This happens when there is no top level lines cover this rva (e.g. empty + // lines found for the function). Then we don't know the call site line + // number for this inlined function. + if (rva >= l.rva + l.length) + return nullptr; + } + return &iter->second; +} + +DWORD PDBSourceLineWriter::Lines::GetLineNum(DWORD rva) const { + const Line* line = GetLine(rva); + return line ? line->line_num : 0; +} + +DWORD PDBSourceLineWriter::Lines::GetFileId(DWORD rva) const { + const Line* line = GetLine(rva); + return line ? line->file_id : 0; +} + +void PDBSourceLineWriter::Lines::AddLine(const Line& line) { + if (line_map_.empty()) { + line_map_[line.rva] = line; + return; + } + + // Given an existing line in line_map_, remove it from line_map_ if it + // overlaps with the line and add a new line for the non-overlap range. Return + // true if there is an overlap. + auto intercept = [&](Line old_line) { + DWORD end = old_line.rva + old_line.length; + // No overlap. + if (old_line.rva >= line.rva + line.length || line.rva >= end) + return false; + // old_line is within the line. + if (old_line.rva >= line.rva && end <= line.rva + line.length) { + line_map_.erase(old_line.rva); + return true; + } + // Then there is a overlap. + if (old_line.rva < line.rva) { + old_line.length -= end - line.rva; + if (end > line.rva + line.length) { + Line new_line = old_line; + new_line.rva = line.rva + line.length; + new_line.length = end - new_line.rva; + line_map_[new_line.rva] = new_line; + } + } else { + line_map_.erase(old_line.rva); + old_line.length -= line.rva + line.length - old_line.rva; + old_line.rva = line.rva + line.length; + } + line_map_[old_line.rva] = old_line; + return true; + }; + + bool is_intercept; + // Use a loop in cases that there are multiple lines within the given line. + do { + auto iter = line_map_.lower_bound(line.rva); + if (iter == line_map_.end()) { + --iter; + intercept(iter->second); + break; + } + is_intercept = false; + if (iter != line_map_.begin()) { + // Check if the given line overlaps a line with smaller in the map. + auto prev = line_map_.lower_bound(line.rva); + --prev; + is_intercept = intercept(prev->second); + } + // Check if the given line overlaps a line with greater or equal rva in the + // map. Using operator |= here since it's possible that there are multiple + // lines with greater rva in the map overlap with the given line. + is_intercept |= intercept(iter->second); + } while (is_intercept); + line_map_[line.rva] = line; +} + +PDBSourceLineWriter::PDBSourceLineWriter(bool handle_inline) + : output_(NULL), handle_inline_(handle_inline) {} + PDBSourceLineWriter::~PDBSourceLineWriter() { Close(); } @@ -280,50 +434,63 @@ bool PDBSourceLineWriter::Open(const wstring& file, FileFormat format) { return true; } -bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers* lines) { - // The line number format is: - // +bool PDBSourceLineWriter::GetLine(IDiaLineNumber* dia_line, Line* line) const { + if (FAILED(dia_line->get_relativeVirtualAddress(&line->rva))) { + fprintf(stderr, "failed to get line rva\n"); + return false; + } + + if (FAILED(dia_line->get_length(&line->length))) { + fprintf(stderr, "failed to get line code length\n"); + return false; + } + + DWORD dia_source_id; + if (FAILED(dia_line->get_sourceFileId(&dia_source_id))) { + fprintf(stderr, "failed to get line source file id\n"); + return false; + } + // duplicate file names are coalesced to share one ID + line->file_id = GetRealFileID(dia_source_id); + + if (FAILED(dia_line->get_lineNumber(&line->line_num))) { + fprintf(stderr, "failed to get line number\n"); + return false; + } + return true; +} + +bool PDBSourceLineWriter::GetLines(IDiaEnumLineNumbers* lines, + Lines* line_list) const { CComPtr line; ULONG count; while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) { - DWORD rva; - if (FAILED(line->get_relativeVirtualAddress(&rva))) { - fprintf(stderr, "failed to get line rva\n"); + Line l; + if (!GetLine(line, &l)) return false; - } - - DWORD length; - if (FAILED(line->get_length(&length))) { - fprintf(stderr, "failed to get line code length\n"); - return false; - } - - DWORD dia_source_id; - if (FAILED(line->get_sourceFileId(&dia_source_id))) { - fprintf(stderr, "failed to get line source file id\n"); - return false; - } - // duplicate file names are coalesced to share one ID - DWORD source_id = GetRealFileID(dia_source_id); - - DWORD line_num; - if (FAILED(line->get_lineNumber(&line_num))) { - fprintf(stderr, "failed to get line number\n"); - return false; - } - - AddressRangeVector ranges; - MapAddressRange(image_map_, AddressRange(rva, length), &ranges); - for (size_t i = 0; i < ranges.size(); ++i) { - fprintf(output_, "%lx %lx %lu %lu\n", ranges[i].rva, ranges[i].length, - line_num, source_id); - } + // Silently ignore zero-length lines. + if (l.length != 0) + line_list->AddLine(l); line.Release(); } return true; } +void PDBSourceLineWriter::PrintLines(const Lines& lines) const { + // The line number format is: + // + for (const auto& kv : lines.GetLineMap()) { + const Line& l = kv.second; + AddressRangeVector ranges; + MapAddressRange(image_map_, AddressRange(l.rva, l.length), &ranges); + for (auto& range : ranges) { + fprintf(output_, "%lx %lx %lu %lu\n", range.rva, range.length, l.line_num, + l.file_id); + } + } +} + bool PDBSourceLineWriter::PrintFunction(IDiaSymbol* function, IDiaSymbol* block, bool has_multiple_symbols) { @@ -372,9 +539,20 @@ bool PDBSourceLineWriter::PrintFunction(IDiaSymbol* function, return false; } - if (!PrintLines(lines)) { + // Get top level lines first, which later may be split into multiple smaller + // lines if any inline exists in their ranges if we want to handle inline. + Lines line_list; + if (!GetLines(lines, &line_list)) { return false; } + if (handle_inline_) { + vector> inlines; + if (!GetInlines(block, &line_list, 0, &inlines)) { + return false; + } + PrintInlines(inlines); + } + PrintLines(line_list); return true; } @@ -555,6 +733,97 @@ bool PDBSourceLineWriter::PrintFunctions() { return true; } +void PDBSourceLineWriter::PrintInlineOrigins() const { + struct OriginCompare { + bool operator()(const InlineOrigin lhs, const InlineOrigin rhs) const { + return lhs.id < rhs.id; + } + }; + set origins; + // Sort by origin id. + for (auto const& origin : inline_origins_) + origins.insert(origin.second); + for (auto o : origins) { + fprintf(output_, "INLINE_ORIGIN %d %ls\n", o.id, o.name.c_str()); + } +} + +bool PDBSourceLineWriter::GetInlines(IDiaSymbol* block, + Lines* line_list, + int inline_nest_level, + vector>* inlines) { + CComPtr inline_callsites; + if (FAILED(block->findChildrenEx(SymTagInlineSite, nullptr, nsNone, + &inline_callsites))) { + return false; + } + ULONG count; + CComPtr callsite; + while (SUCCEEDED(inline_callsites->Next(1, &callsite, &count)) && + count == 1) { + unique_ptr new_inline(new Inline(inline_nest_level)); + CComPtr lines; + // All inlinee lines have the same file id. + DWORD file_id = 0; + DWORD call_site_line = 0; + if (FAILED(session_->findInlineeLines(callsite, &lines))) { + return false; + } + CComPtr dia_line; + while (SUCCEEDED(lines->Next(1, &dia_line, &count)) && count == 1) { + Line line; + if (!GetLine(dia_line, &line)) { + return false; + } + // Silently ignore zero-length lines. + if (line.length != 0) { + // Use the first line num and file id at rva as this inline's call site + // line number, because after adding lines it may be changed to inner + // line number and inner file id. + if (call_site_line == 0) + call_site_line = line_list->GetLineNum(line.rva); + if (file_id == 0) + file_id = line_list->GetFileId(line.rva); + line_list->AddLine(line); + new_inline->ExtendRanges(line); + } + dia_line.Release(); + } + BSTR name; + callsite->get_name(&name); + if (SysStringLen(name) == 0) { + name = SysAllocString(L""); + } + auto iter = inline_origins_.find(name); + if (iter == inline_origins_.end()) { + InlineOrigin origin; + origin.id = inline_origins_.size(); + origin.name = name; + inline_origins_[name] = origin; + } + new_inline->SetOriginId(inline_origins_[name].id); + new_inline->SetCallSiteLine(call_site_line); + new_inline->SetCallSiteFileId(file_id); + // Go to next level. + vector> child_inlines; + if (!GetInlines(callsite, line_list, inline_nest_level + 1, + &child_inlines)) { + return false; + } + new_inline->SetChildInlines(std::move(child_inlines)); + inlines->push_back(std::move(new_inline)); + callsite.Release(); + } + return true; +} + +void PDBSourceLineWriter::PrintInlines( + const vector>& inlines) const { + for (const unique_ptr& in : inlines) { + in->Print(output_); + } +} + #undef max bool PDBSourceLineWriter::PrintFrameDataUsingPDB() { @@ -1105,10 +1374,8 @@ bool PDBSourceLineWriter::WriteSymbols(FILE* symbol_file) { bool ret = PrintPDBInfo(); // This is not a critical piece of the symbol file. PrintPEInfo(); - ret = ret && - PrintSourceFiles() && - PrintFunctions() && - PrintFrameData(); + ret = ret && PrintSourceFiles() && PrintFunctions() && PrintFrameData(); + PrintInlineOrigins(); output_ = NULL; return ret; diff --git a/src/common/windows/pdb_source_line_writer.h b/src/common/windows/pdb_source_line_writer.h index 00f6e592..b1aa85e1 100644 --- a/src/common/windows/pdb_source_line_writer.h +++ b/src/common/windows/pdb_source_line_writer.h @@ -35,8 +35,10 @@ #include +#include #include #include +#include #include "common/windows/module_info.h" #include "common/windows/omap.h" @@ -47,6 +49,8 @@ struct IDiaSymbol; namespace google_breakpad { +using std::map; +using std::vector; using std::wstring; using std::unordered_map; @@ -58,7 +62,7 @@ class PDBSourceLineWriter { ANY_FILE // try PDB_FILE and then EXE_FILE }; - explicit PDBSourceLineWriter(); + explicit PDBSourceLineWriter(bool handle_inline); ~PDBSourceLineWriter(); // Opens the given file. For executable files, the corresponding pdb @@ -99,9 +103,110 @@ class PDBSourceLineWriter { bool UsesGUID(bool *uses_guid); private: - // Outputs the line/address pairs for each line in the enumerator. + // InlineOrigin represents INLINE_ORIGIN record in a symbol file. It's an + // inlined function. + struct InlineOrigin { + // The unique id for an InlineOrigin. + int id; + // The name of the inlined function. + wstring name; + }; + + // Line represents LINE record in a symbol file. It represents a source code + // line. + struct Line { + // The relative address of a line. + DWORD rva; + // The number bytes this line has. + DWORD length; + // The source line number. + DWORD line_num; + // The source file id where the source line is located at. + DWORD file_id; + }; + + // Inline represents INLINE record in a symbol file. + class Inline { + public: + explicit Inline(int inline_nest_level); + + void SetOriginId(int origin_id); + + // Adding inlinee line's range into ranges. If line is adjacent with any + // existing lines, extend the range. Otherwise, add line as a new range. + void ExtendRanges(const Line& line); + + void SetCallSiteLine(DWORD call_site_line); + + void SetCallSiteFileId(DWORD call_site_file_id); + + void SetChildInlines(std::vector> child_inlines); + + void Print(FILE* output) const; + + private: + // The nest level of this inline record. + int inline_nest_level_; + // The source line number at where this inlined function is called. + DWORD call_site_line_ = 0; + // The call site file id at where this inlined function is called. + DWORD call_site_file_id_ = 0; + // The id used for referring to an InlineOrigin. + int origin_id_ = 0; + // A map from rva to length. This is the address ranges covered by this + // Inline. + map ranges_; + // The list of direct Inlines inlined inside this Inline. + vector> child_inlines_; + }; + + // Lines represents a map of lines inside a function with rva as the key. + // AddLine function adds a line into the map and ensures that there is no + // overlap between any two lines in the map. + class Lines { + public: + const map& GetLineMap() const { return line_map_; } + + // Finds the line from line_map_ that contains the given rva returns its + // line_num. If not found, return 0. + DWORD GetLineNum(DWORD rva) const; + + // Finds the line from line_map_ that contains the given rva returns its + // file_id. If not found, return 0. + DWORD GetFileId(DWORD rva) const; + + // Add the `line` into line_map_. If the `line` overlaps with existing + // lines, truncate the existing lines and add the given line. It ensures + // that all lines in line_map_ do not overlap with each other. For example, + // suppose there is a line A in the map and we call AddLine with Line B. + // Line A: rva: 100, length: 20, line_num: 10, file_id: 1 + // Line B: rva: 105, length: 10, line_num: 4, file_id: 2 + // After calling AddLine with Line B, we will have the following lines: + // Line 1: rva: 100, length: 5, line_num: 10, file_id: 1 + // Line 2: rva: 105, length: 10, line_num: 4, file_id: 2 + // Line 3: rva: 115, length: 5, line_num: 10, file_id: 1 + void AddLine(const Line& line); + + private: + // Finds the line from line_map_ that contains the given rva. If not found, + // return nullptr. + const Line* GetLine(DWORD rva) const; + // The key is rva. AddLine function ensures that any two lines in the map do + // not overlap. + map line_map_; + }; + + // Construct Line from IDiaLineNumber. The output Line is stored at line. + // Return true on success. + bool GetLine(IDiaLineNumber* dia_line, Line* line) const; + + // Construct Lines from IDiaEnumLineNumbers. The list of Lines are stored at + // line_list. // Returns true on success. - bool PrintLines(IDiaEnumLineNumbers *lines); + bool GetLines(IDiaEnumLineNumbers* lines, Lines* line_list) const; + + // Outputs the line/address pairs for each line in the enumerator. + void PrintLines(const Lines& lines) const; // Outputs a function address and name, followed by its source line list. // block can be the same object as function, or it can be a reference to a @@ -118,6 +223,25 @@ class PDBSourceLineWriter { // Returns true on success. bool PrintSourceFiles(); + // Output all inline origins. + void PrintInlineOrigins() const; + + // Retrieve inlines inside the given block. It also adds inlinee lines to + // `line_list` since inner lines are more precise source location. If the + // block has children wih SymTagInlineSite Tag, it will recursively (DFS) call + // itself with each child as first argument. Returns true on success. + // `block`: the IDiaSymbol that may have inline sites. + // `line_list`: the list of lines inside current function. + // `inline_nest_level`: the nest level of block's Inlines. + // `inlines`: the vector to store the list of inlines for the block. + bool GetInlines(IDiaSymbol* block, + Lines* line_list, + int inline_nest_level, + vector>* inlines); + + // Outputs all inlines. + void PrintInlines(const vector>& inlines) const; + // Outputs all of the frame information necessary to construct stack // backtraces in the absence of frame pointers. For x86 data stored in // .pdb files. Returns true on success. @@ -172,8 +296,8 @@ class PDBSourceLineWriter { // reference it. There may be multiple files with identical filenames // but different unique IDs. The cache attempts to coalesce these into // one ID per unique filename. - DWORD GetRealFileID(DWORD id) { - unordered_map::iterator iter = file_ids_.find(id); + DWORD GetRealFileID(DWORD id) const { + unordered_map::const_iterator iter = file_ids_.find(id); if (iter == file_ids_.end()) return id; return iter->second; @@ -213,9 +337,15 @@ class PDBSourceLineWriter { // This maps unique filenames to file IDs. unordered_map unique_files_; + // The INLINE_ORIGINS records. The key is the function name. + std::map inline_origins_; + // This is used for calculating post-transform symbol addresses and lengths. ImageMap image_map_; + // If we should output INLINE/INLINE_ORIGIN records + bool handle_inline_; + // Disallow copy ctor and operator= PDBSourceLineWriter(const PDBSourceLineWriter&); void operator=(const PDBSourceLineWriter&); diff --git a/src/tools/windows/dump_syms/dump_syms.cc b/src/tools/windows/dump_syms/dump_syms.cc index 1b1797dc..fa4980fc 100644 --- a/src/tools/windows/dump_syms/dump_syms.cc +++ b/src/tools/windows/dump_syms/dump_syms.cc @@ -38,30 +38,55 @@ #include "common/windows/pdb_source_line_writer.h" #include "common/windows/pe_source_line_writer.h" -using std::wstring; using google_breakpad::PDBSourceLineWriter; using google_breakpad::PESourceLineWriter; using std::unique_ptr; +using std::wstring; + +int usage(const wchar_t* self) { + fprintf(stderr, "Usage: %ws [--pe] [--i] \n", self); + fprintf(stderr, "Options:\n"); + fprintf(stderr, + "--pe:\tRead debugging information from PE file and do " + "not attempt to locate matching PDB file.\n" + "\tThis is only supported for PE32+ (64 bit) PE files.\n"); + fprintf(stderr, + "--i:\tOutput INLINE/INLINE_ORIGIN record\n" + "\tThis cannot be used with [--pe].\n"); + return 1; +} int wmain(int argc, wchar_t** argv) { - bool success; - if (argc == 2) { - PDBSourceLineWriter pdb_writer; - if (!pdb_writer.Open(wstring(argv[1]), PDBSourceLineWriter::ANY_FILE)) { + bool success = false; + bool pe = false; + bool handle_inline = false; + int arg_index = 1; + while (arg_index < argc && wcslen(argv[arg_index]) > 0 && + wcsncmp(L"--", argv[arg_index], 2) == 0) { + if (wcscmp(L"--pe", argv[arg_index]) == 0) { + pe = true; + } else if (wcscmp(L"--i", argv[arg_index]) == 0) { + handle_inline = true; + } + ++arg_index; + } + + if ((pe && handle_inline) || arg_index == argc) { + usage(argv[0]); + return 1; + } + + wchar_t* file_path = argv[arg_index]; + if (pe) { + PESourceLineWriter pe_writer(file_path); + success = pe_writer.WriteSymbols(stdout); + } else { + PDBSourceLineWriter pdb_writer(handle_inline); + if (!pdb_writer.Open(wstring(file_path), PDBSourceLineWriter::ANY_FILE)) { fprintf(stderr, "Open failed.\n"); return 1; } success = pdb_writer.WriteSymbols(stdout); - } else if (argc == 3 && wcscmp(argv[1], L"--pe") == 0) { - PESourceLineWriter pe_writer(argv[2]); - success = pe_writer.WriteSymbols(stdout); - } else { - fprintf(stderr, "Usage: %ws [--pe] \n", argv[0]); - fprintf(stderr, "Options:\n"); - fprintf(stderr, "--pe:\tRead debugging information from PE file and do " - "not attempt to locate matching PDB file.\n" - "\tThis is only supported for PE32+ (64 bit) PE files.\n"); - return 1; } if (!success) {