From 4f5b814790301e7b8252535e0732bee11f4bb801 Mon Sep 17 00:00:00 2001 From: Zequan Wu <zequanwu@google.com> Date: Tue, 3 Aug 2021 14:26:38 -0700 Subject: [PATCH] Add INLINE and INLINE_ORIGIN records to symbol file. The size of symbol file for chrome binary increased from 577 MB to 1205 MB. There are 7,453,748 INLINE records and 1,268,493 INLINE_ORIGIN records. Bug: 1190878 Change-Id: I802ec1b4574c14f74ff80d0f69daf3c81085778a Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/2915828 Reviewed-by: Joshua Peraza <jperaza@chromium.org> --- src/common/dwarf_cu_to_module.cc | 339 +++++++++++++++++--- src/common/dwarf_cu_to_module.h | 9 +- src/common/dwarf_cu_to_module_unittest.cc | 16 +- src/common/dwarf_line_to_module.cc | 4 +- src/common/dwarf_line_to_module.h | 13 +- src/common/dwarf_line_to_module_unittest.cc | 57 ++-- src/common/linux/dump_symbols.cc | 9 +- src/common/mac/dump_syms.cc | 13 +- src/common/module.cc | 66 +++- src/common/module.h | 63 ++++ src/tools/linux/dump_syms/dump_syms.cc | 8 +- 11 files changed, 511 insertions(+), 86 deletions(-) diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc index d129fba4..e3bd9414 100644 --- a/src/common/dwarf_cu_to_module.cc +++ b/src/common/dwarf_cu_to_module.cc @@ -44,6 +44,7 @@ #include <stdio.h> #include <algorithm> +#include <memory> #include <numeric> #include <utility> @@ -58,6 +59,7 @@ using std::map; using std::pair; using std::sort; using std::vector; +using std::unique_ptr; // Data provided by a DWARF specification DIE. // @@ -98,6 +100,71 @@ struct AbstractOrigin { typedef map<uint64_t, AbstractOrigin> AbstractOriginByOffset; +using InlineOriginByOffset = map<uint64_t, Module::InlineOrigin*>; + +class InlineOriginMap { + public: + Module::InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, + const string& name) { + uint64_t specification_offset = references_[offset]; + if (inline_origins_.find(specification_offset) != inline_origins_.end()) { + if (inline_origins_[specification_offset]->name == "<name omitted>") { + inline_origins_[specification_offset]->name = name; + } + return inline_origins_[specification_offset]; + } + inline_origins_[specification_offset] = new Module::InlineOrigin(name); + return inline_origins_[specification_offset]; + } + + // offset is the offset of a DW_TAG_subprogram. specification_offset is the + // value of its DW_AT_specification or equals to offset if DW_AT_specification + // doesn't exist in that DIE. + void SetReference(uint64_t offset, uint64_t specification_offset) { + // If we haven't seen this doesn't exist in reference map, always add it. + if (references_.find(offset) == references_.end()) { + references_[offset] = specification_offset; + return; + } + // If offset equals specification_offset and offset exists in references_, + // there is no need to update the references_ map. This early return is + // necessary because the call to erase in following if will remove the entry + // of specification_offset in inline_origins_. + // If specification_offset equals to references_[offset], it might be + // duplicate debug info. + if (offset == specification_offset || + specification_offset == references_[offset]) + return; + + // Fix up mapping in inline_origins_. + auto remove = inline_origins_.find(references_[offset]); + if (remove != inline_origins_.end()) { + inline_origins_[specification_offset] = remove->second; + inline_origins_.erase(remove); + } + references_[offset] = specification_offset; + } + + void AssignFilesToInlineOrigins(vector<uint64_t>& inline_origin_offsets, + Module::File* file) { + for (uint64_t offset : inline_origin_offsets) + if (references_.find(offset) != references_.end()) { + auto origin = inline_origins_.find(references_[offset]); + if (origin != inline_origins_.end()) + origin->second->file = file; + } + } + + private: + // A map from a DW_TAG_subprogram's offset to the DW_TAG_subprogram. + InlineOriginByOffset inline_origins_; + + // A map from a DW_TAG_subprogram's offset to the offset of its specification + // or abstract origin subprogram. The set of values in this map should always + // be the same set of keys in inline_origins_. + map<uint64_t, uint64_t> references_; +}; + // Data global to the DWARF-bearing file that is private to the // DWARF-to-Module process. struct DwarfCUToModule::FilePrivate { @@ -130,6 +197,8 @@ struct DwarfCUToModule::FilePrivate { // Keep a list of forward references from DW_AT_abstract_origin and // DW_AT_specification attributes so names can be fixed up. std::map<uint64_t, Module::Function*> forward_ref_die_to_func; + + InlineOriginMap inline_origin_map; }; DwarfCUToModule::FileContext::FileContext(const string& filename, @@ -272,6 +341,9 @@ struct DwarfCUToModule::CUContext { // A map of function pointers to the its forward specification DIE's offset. map<Module::Function*, uint64_t> spec_function_offsets; + + // From file index to vector of subprogram's offset in this CU. + map<uint64_t, vector<uint64_t>> inline_origins; }; // Information about the context of a particular DIE. This is for @@ -304,7 +376,8 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler { offset_(offset), declaration_(false), specification_(NULL), - forward_ref_die_offset_(0) { } + abstract_origin_(NULL), + forward_ref_die_offset_(0), specification_offset_(0) { } // Derived classes' ProcessAttributeUnsigned can defer to this to // handle DW_AT_declaration, or simply not override it. @@ -356,11 +429,19 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler { // Otherwise, this is NULL. Specification* specification_; + // If this DIE has a DW_AT_abstract_origin attribute, this is the + // AbstractOrigin structure for the DIE the attribute refers to. + // Otherwise, this is NULL. + const AbstractOrigin* abstract_origin_; + // If this DIE has a DW_AT_specification or DW_AT_abstract_origin and it is a // forward reference, no Specification will be available. Track the reference // to be fixed up when the DIE is parsed. uint64_t forward_ref_die_offset_; + // The root offset of Specification or abstract origin. + uint64_t specification_offset_; + // The value of the DW_AT_name attribute, or the empty string if the // DIE has no such attribute. string name_attribute_; @@ -412,6 +493,21 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference( } else { cu_context_->reporter->UnknownSpecification(offset_, data); } + specification_offset_ = data; + break; + } + case DW_AT_abstract_origin: { + const AbstractOriginByOffset& origins = + cu_context_->file_context->file_private_->origins; + AbstractOriginByOffset::const_iterator origin = origins.find(data); + if (origin != origins.end()) { + abstract_origin_ = &(origin->second); + } else if (data > offset_) { + forward_ref_die_offset_ = data; + } else { + cu_context_->reporter->UnknownAbstractOrigin(offset_, data); + } + specification_offset_ = data; break; } default: break; @@ -519,6 +615,163 @@ string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { return return_value; } +static bool IsEmptyRange(const vector<Module::Range>& ranges) { + uint64_t size = accumulate(ranges.cbegin(), ranges.cend(), 0, + [](uint64_t total, Module::Range entry) { + return total + entry.size; + } + ); + + return size == 0; +} + + +// A handler for DW_TAG_inlined_subroutine DIEs. +class DwarfCUToModule::InlineHandler : public GenericDIEHandler { + public: + InlineHandler(CUContext* cu_context, + DIEContext* parent_context, + uint64_t offset, + int inline_nest_level, + vector<unique_ptr<Module::Inline>>& inlines) + : GenericDIEHandler(cu_context, parent_context, offset), + low_pc_(0), + high_pc_(0), + high_pc_form_(DW_FORM_addr), + ranges_form_(DW_FORM_sec_offset), + ranges_data_(0), + call_site_line_(0), + inline_nest_level_(inline_nest_level), + inlines_(inlines) {} + + void ProcessAttributeUnsigned(enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data); + DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag); + bool EndAttributes(); + void Finish(); + + private: + // The fully-qualified name, as derived from name_attribute_, + // specification_, parent_context_. Computed in EndAttributes. + string name_; + uint64_t low_pc_; // DW_AT_low_pc + uint64_t high_pc_; // DW_AT_high_pc + DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. + DwarfForm ranges_form_; // DW_FORM_sec_offset or DW_FORM_rnglistx + uint64_t ranges_data_; // DW_AT_ranges + int call_site_line_; + int inline_nest_level_; + // A vector of inlines in the same nest level. It's owned by its parent + // function/inline. At Finish(), add this inline into the vector. + vector<unique_ptr<Module::Inline>>& inlines_; + // A vector of child inlines. + vector<unique_ptr<Module::Inline>> child_inlines_; +}; + +void DwarfCUToModule::InlineHandler::ProcessAttributeUnsigned( + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t data) { + switch (attr) { + case DW_AT_low_pc: + low_pc_ = data; + break; + case DW_AT_high_pc: + high_pc_form_ = form; + high_pc_ = data; + break; + case DW_AT_ranges: + ranges_data_ = data; + ranges_form_ = form; + break; + case DW_AT_call_line: + call_site_line_ = data; + break; + default: + GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); + break; + } +} + +DIEHandler* DwarfCUToModule::InlineHandler::FindChildHandler( + uint64_t offset, + enum DwarfTag tag) { + switch (tag) { + case DW_TAG_inlined_subroutine: + return new InlineHandler(cu_context_, new DIEContext(), offset, + inline_nest_level_ + 1, child_inlines_); + default: + return NULL; + } +} + +bool DwarfCUToModule::InlineHandler::EndAttributes() { + if (abstract_origin_) + name_ = abstract_origin_->name; + if (name_.empty()) { + // We haven't seen the abstract origin yet, which might appears later and we + // will fix the name after calling + // InlineOriginMap::GetOrCreateInlineOrigin with right name. + name_ = "<name omitted>"; + } + return true; +} + +void DwarfCUToModule::InlineHandler::Finish() { + vector<Module::Range> ranges; + + if (low_pc_ && high_pc_) { + if (high_pc_form_ != DW_FORM_addr && + high_pc_form_ != DW_FORM_GNU_addr_index && + high_pc_form_ != DW_FORM_addrx && + high_pc_form_ != DW_FORM_addrx1 && + high_pc_form_ != DW_FORM_addrx2 && + high_pc_form_ != DW_FORM_addrx3 && + high_pc_form_ != DW_FORM_addrx4) { + high_pc_ += low_pc_; + } + + Module::Range range(low_pc_, high_pc_ - low_pc_); + ranges.push_back(range); + } else { + RangesHandler* ranges_handler = cu_context_->ranges_handler; + if (ranges_handler) { + RangeListReader::CURangesInfo cu_info; + if (cu_context_->AssembleRangeListInfo(&cu_info)) { + if (!ranges_handler->ReadRanges(ranges_form_, ranges_data_, + &cu_info, &ranges)) { + ranges.clear(); + cu_context_->reporter->MalformedRangeList(ranges_data_); + } + } else { + cu_context_->reporter->MissingRanges(); + } + } + } + + // Malformed DWARF may omit the name, but all Module::Functions must + // have names. + // If we have a forward reference to a DW_AT_specification or + // DW_AT_abstract_origin, then don't warn, the name will be fixed up + // later + if (name_.empty() && forward_ref_die_offset_ == 0) + cu_context_->reporter->UnnamedFunction(offset_); + + // Every DW_TAG_inlined_subroutine should have a DW_AT_abstract_origin. + assert(specification_offset_ != 0); + + cu_context_->file_context->file_private_->inline_origin_map.SetReference( + specification_offset_, specification_offset_); + Module::InlineOrigin* origin = + cu_context_->file_context->file_private_->inline_origin_map + .GetOrCreateInlineOrigin(specification_offset_, name_); + unique_ptr<Module::Inline> in = std::make_unique<Module::Inline>( + origin, ranges, call_site_line_, inline_nest_level_, + std::move(child_inlines_)); + inlines_.push_back(std::move(in)); +} + // A handler class for DW_TAG_subprogram DIEs. class DwarfCUToModule::FuncHandler: public GenericDIEHandler { public: @@ -527,7 +780,7 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler { : GenericDIEHandler(cu_context, parent_context, offset), low_pc_(0), high_pc_(0), high_pc_form_(DW_FORM_addr), ranges_form_(DW_FORM_sec_offset), ranges_data_(0), - abstract_origin_(NULL), inline_(false) { } + decl_file_data_(UINT64_MAX), inline_(false) { } void ProcessAttributeUnsigned(enum DwarfAttribute attr, enum DwarfForm form, @@ -535,10 +788,7 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler { void ProcessAttributeSigned(enum DwarfAttribute attr, enum DwarfForm form, int64_t data); - void ProcessAttributeReference(enum DwarfAttribute attr, - enum DwarfForm form, - uint64_t data); - + DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag); bool EndAttributes(); void Finish(); @@ -550,8 +800,10 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler { DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. DwarfForm ranges_form_; // DW_FORM_sec_offset or DW_FORM_rnglistx uint64_t ranges_data_; // DW_AT_ranges - const AbstractOrigin* abstract_origin_; + // DW_AT_decl_file, value of UINT64_MAX means undefined. + uint64_t decl_file_data_; bool inline_; + vector<unique_ptr<Module::Inline>> child_inlines_; }; void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned( @@ -573,7 +825,9 @@ void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned( ranges_data_ = data; ranges_form_ = form; break; - + case DW_AT_decl_file: + decl_file_data_ = data; + break; default: GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); break; @@ -595,27 +849,15 @@ void DwarfCUToModule::FuncHandler::ProcessAttributeSigned( } } -void DwarfCUToModule::FuncHandler::ProcessAttributeReference( - enum DwarfAttribute attr, - enum DwarfForm form, - uint64_t data) { - switch (attr) { - case DW_AT_abstract_origin: { - const AbstractOriginByOffset& origins = - cu_context_->file_context->file_private_->origins; - AbstractOriginByOffset::const_iterator origin = origins.find(data); - if (origin != origins.end()) { - abstract_origin_ = &(origin->second); - } else if (data > offset_) { - forward_ref_die_offset_ = data; - } else { - cu_context_->reporter->UnknownAbstractOrigin(offset_, data); - } - break; - } +DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler( + uint64_t offset, + enum DwarfTag tag) { + switch (tag) { + case DW_TAG_inlined_subroutine: + return new InlineHandler(cu_context_, new DIEContext(), offset, 0, + child_inlines_); default: - GenericDIEHandler::ProcessAttributeReference(attr, form, data); - break; + return NULL; } } @@ -628,16 +870,6 @@ bool DwarfCUToModule::FuncHandler::EndAttributes() { return true; } -static bool IsEmptyRange(const vector<Module::Range>& ranges) { - uint64_t size = accumulate(ranges.cbegin(), ranges.cend(), 0, - [](uint64_t total, Module::Range entry) { - return total + entry.size; - } - ); - - return size == 0; -} - void DwarfCUToModule::FuncHandler::Finish() { vector<Module::Range> ranges; @@ -683,11 +915,12 @@ void DwarfCUToModule::FuncHandler::Finish() { } } + bool empty_range = IsEmptyRange(ranges); // Did we collect the information we need? Not all DWARF function // entries are non-empty (for example, inlined functions that were never // used), but all the ones we're interested in cover a non-empty range of // bytes. - if (!IsEmptyRange(ranges)) { + if (!empty_range) { low_pc_ = ranges.front().address; // Malformed DWARF may omit the name, but all Module::Functions must @@ -721,11 +954,27 @@ void DwarfCUToModule::FuncHandler::Finish() { cu_context_->spec_function_offsets[cu_context_->functions.back()] = forward_ref_die_offset_; } + + cu_context_->functions.back()->inlines.swap(child_inlines_); } } else if (inline_) { AbstractOrigin origin(name_); cu_context_->file_context->file_private_->origins[offset_] = origin; } + + // Only keep track of DW_TAG_subprogram which have the attributes we are + // interested. + if (!empty_range || inline_ || decl_file_data_ != UINT64_MAX) { + uint64_t offset = + specification_offset_ != 0 ? specification_offset_ : offset_; + cu_context_->file_context->file_private_->inline_origin_map.SetReference( + offset_, offset); + cu_context_->file_context->file_private_->inline_origin_map + .GetOrCreateInlineOrigin(offset_, + name_.empty() ? "<name omitted>" : name_); + if (decl_file_data_ != UINT64_MAX) + cu_context_->inline_origins[decl_file_data_].push_back(offset_); + } } // A handler for DIEs that contain functions and contribute a @@ -1041,7 +1290,7 @@ void DwarfCUToModule::ReadSourceLines(uint64_t offset) { line_section_start, line_section_length, string_section_start, string_section_length, line_string_section_start, line_string_section_length, - cu_context_->file_context->module_, &lines_); + cu_context_->file_context->module_, &lines_, &files_); } namespace { @@ -1300,6 +1549,14 @@ void DwarfCUToModule::AssignLinesToFunctions() { } } +void DwarfCUToModule::AssignFilesToInlines() { + for (auto iter : files_) { + cu_context_->file_context->file_private_->inline_origin_map + .AssignFilesToInlineOrigins(cu_context_->inline_origins[iter.first], + iter.second); + } +} + void DwarfCUToModule::Finish() { // Assembly language files have no function data, and that gives us // no place to store our line numbers (even though the GNU toolchain @@ -1318,6 +1575,8 @@ void DwarfCUToModule::Finish() { // Dole out lines to the appropriate functions. AssignLinesToFunctions(); + AssignFilesToInlines(); + // Add our functions, which now have source lines assigned to them, // to module_, and remove duplicate functions. for (Module::Function* func : *functions) diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h index 1320ccc2..99dcd879 100644 --- a/src/common/dwarf_cu_to_module.h +++ b/src/common/dwarf_cu_to_module.h @@ -156,7 +156,8 @@ class DwarfCUToModule: public RootDIEHandler { uint64_t string_section_length, const uint8_t* line_string_section, uint64_t line_string_length, - Module* module, vector<Module::Line>* lines) = 0; + Module* module, vector<Module::Line>* lines, + map<uint32_t, Module::File*>* files) = 0; }; // The interface DwarfCUToModule uses to report warnings. The member @@ -289,6 +290,7 @@ class DwarfCUToModule: public RootDIEHandler { struct Specification; class GenericDIEHandler; class FuncHandler; + class InlineHandler; class NamedScopeHandler; // A map from section offsets to specifications. @@ -309,6 +311,8 @@ class DwarfCUToModule: public RootDIEHandler { // lines belong to which functions, beyond their addresses.) void AssignLinesToFunctions(); + void AssignFilesToInlines(); + // The only reason cu_context_ and child_context_ are pointers is // that we want to keep their definitions private to // dwarf_cu_to_module.cc, instead of listing them all here. They are @@ -335,6 +339,9 @@ class DwarfCUToModule: public RootDIEHandler { // during parsing. Then, in Finish, we call AssignLinesToFunctions // to dole them out to the appropriate functions. vector<Module::Line> lines_; + + // The map from file index to File* in this CU. + std::map<uint32_t, Module::File*> files_; }; } // namespace google_breakpad diff --git a/src/common/dwarf_cu_to_module_unittest.cc b/src/common/dwarf_cu_to_module_unittest.cc index 2ce69d73..cb943ae3 100644 --- a/src/common/dwarf_cu_to_module_unittest.cc +++ b/src/common/dwarf_cu_to_module_unittest.cc @@ -67,12 +67,13 @@ using ::testing::ValuesIn; class MockLineToModuleHandler: public DwarfCUToModule::LineToModuleHandler { public: MOCK_METHOD1(StartCompilationUnit, void(const string& compilation_dir)); - MOCK_METHOD8(ReadProgram, void(const uint8_t* program, uint64_t length, + MOCK_METHOD9(ReadProgram, void(const uint8_t* program, uint64_t length, const uint8_t* string_section, uint64_t string_section_length, const uint8_t* line_string_section, uint64_t line_string_section_length, - Module* module, vector<Module::Line>* lines)); + Module* module, vector<Module::Line>* lines, + std::map<uint32_t, Module::File*>* files)); }; class MockWarningReporter: public DwarfCUToModule::WarningReporter { @@ -122,7 +123,8 @@ class CUFixtureBase { uint64_t string_section_length, const uint8_t* line_string_section, uint64_t line_string_section_length, - Module *module, vector<Module::Line>* lines) { + Module *module, vector<Module::Line>* lines, + std::map<uint32_t, Module::File*>* files) { lines->insert(lines->end(), lines_->begin(), lines_->end()); } private: @@ -155,7 +157,7 @@ class CUFixtureBase { // By default, expect the line program reader not to be invoked. We // may override this in StartCU. EXPECT_CALL(line_reader_, StartCompilationUnit(_)).Times(0); - EXPECT_CALL(line_reader_, ReadProgram(_,_,_,_,_,_,_,_)).Times(0); + EXPECT_CALL(line_reader_, ReadProgram(_,_,_,_,_,_,_,_,_)).Times(0); // The handler will consult this section map to decide what to // pass to our line reader. @@ -341,7 +343,7 @@ void CUFixtureBase::StartCU() { EXPECT_CALL(line_reader_, ReadProgram(&dummy_line_program_[0], dummy_line_size_, _,_,_,_, - &module_, _)) + &module_, _,_)) .Times(AtMost(1)) .WillOnce(DoAll(Invoke(appender_), Return())); ASSERT_TRUE(root_handler_ @@ -1517,7 +1519,7 @@ TEST_F(Specifications, InterCU) { DwarfCUToModule::FileContext fc("dwarf-filename", &m, true); EXPECT_CALL(reporter_, UncoveredFunction(_)).WillOnce(Return()); MockLineToModuleHandler lr; - EXPECT_CALL(lr, ReadProgram(_,_,_,_,_,_,_,_)).Times(0); + EXPECT_CALL(lr, ReadProgram(_,_,_,_,_,_,_,_,_)).Times(0); // Kludge: satisfy reporter_'s expectation. reporter_.SetCUName("compilation-unit-name"); @@ -1576,7 +1578,7 @@ TEST_F(Specifications, UnhandledInterCU) { DwarfCUToModule::FileContext fc("dwarf-filename", &m, false); EXPECT_CALL(reporter_, UncoveredFunction(_)).WillOnce(Return()); MockLineToModuleHandler lr; - EXPECT_CALL(lr, ReadProgram(_,_,_,_,_,_,_,_)).Times(0); + EXPECT_CALL(lr, ReadProgram(_,_,_,_,_,_,_,_,_)).Times(0); // Kludge: satisfy reporter_'s expectation. reporter_.SetCUName("compilation-unit-name"); diff --git a/src/common/dwarf_line_to_module.cc b/src/common/dwarf_line_to_module.cc index fe808c08..83bb8f15 100644 --- a/src/common/dwarf_line_to_module.cc +++ b/src/common/dwarf_line_to_module.cc @@ -100,7 +100,7 @@ void DwarfLineToModule::DefineFile(const string& name, int32_t file_num, // Find a Module::File object of the given name, and add it to the // file table. - files_[file_num] = module_->FindFile(full_name); + (*files_)[file_num] = module_->FindFile(full_name); } void DwarfLineToModule::AddLine(uint64_t address, uint64_t length, @@ -122,7 +122,7 @@ void DwarfLineToModule::AddLine(uint64_t address, uint64_t length, } // Find the source file being referred to. - Module::File *file = files_[file_num]; + Module::File *file = (*files_)[file_num]; if (!file) { if (!warned_bad_file_number_) { fprintf(stderr, "warning: DWARF line number data refers to " diff --git a/src/common/dwarf_line_to_module.h b/src/common/dwarf_line_to_module.h index 8e7a0b0d..da2c5f0e 100644 --- a/src/common/dwarf_line_to_module.h +++ b/src/common/dwarf_line_to_module.h @@ -120,16 +120,19 @@ class DwarfLineToModule: public LineInfoHandler { // end of the address space, we clip it. It's up to our client to // sort out which lines belong to which functions; we don't add them // to any particular function in MODULE ourselves. - DwarfLineToModule(Module *module, const string& compilation_dir, - vector<Module::Line>* lines) + DwarfLineToModule(Module* module, + const string& compilation_dir, + vector<Module::Line>* lines, + std::map<uint32_t, Module::File*>* files) : module_(module), compilation_dir_(compilation_dir), lines_(lines), + files_(files), highest_file_number_(-1), omitted_line_end_(0), warned_bad_file_number_(false), warned_bad_directory_number_(false) { } - + ~DwarfLineToModule() { } void DefineDir(const string& name, uint32_t dir_num); @@ -167,12 +170,12 @@ class DwarfLineToModule: public LineInfoHandler { DirectoryTable directories_; // A table mapping file numbers to Module::File pointers. - FileTable files_; + FileTable* files_; // The highest file number we've seen so far, or -1 if we've seen // none. Used for dynamically defined file numbers. int32_t highest_file_number_; - + // This is the ending address of the last line we omitted, or zero if we // didn't omit the previous line. It is zero before we have received any // AddLine calls. diff --git a/src/common/dwarf_line_to_module_unittest.cc b/src/common/dwarf_line_to_module_unittest.cc index 90b6570d..34cb02ed 100644 --- a/src/common/dwarf_line_to_module_unittest.cc +++ b/src/common/dwarf_line_to_module_unittest.cc @@ -45,7 +45,8 @@ using google_breakpad::Module; TEST(SimpleModule, One) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineFile("file1", 0x30bf0f27, 0, 0, 0); h.AddLine(0x6fd126fbf74f2680LL, 0x63c9a14cf556712bLL, 0x30bf0f27, @@ -66,7 +67,8 @@ TEST(SimpleModule, One) { TEST(SimpleModule, Many) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineDir("directory1", 0x838299ab); h.DefineDir("directory2", 0xf85de023); @@ -126,7 +128,8 @@ TEST(SimpleModule, Many) { TEST(Filenames, Absolute) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineDir("directory1", 1); h.DefineFile("/absolute", 1, 1, 0, 0); @@ -144,7 +147,8 @@ TEST(Filenames, Absolute) { TEST(Filenames, Relative) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineDir("directory1", 1); h.DefineFile("relative", 1, 1, 0, 0); @@ -162,7 +166,8 @@ TEST(Filenames, Relative) { TEST(Filenames, StrangeFile) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineDir("directory1", 1); h.DefineFile("", 1, 1, 0, 0); @@ -175,7 +180,8 @@ TEST(Filenames, StrangeFile) { TEST(Filenames, StrangeDirectory) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineDir("", 1); h.DefineFile("file1", 1, 1, 0, 0); @@ -188,7 +194,8 @@ TEST(Filenames, StrangeDirectory) { TEST(Filenames, StrangeDirectoryAndFile) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineDir("", 1); h.DefineFile("", 1, 1, 0, 0); @@ -203,7 +210,8 @@ TEST(Filenames, StrangeDirectoryAndFile) { TEST(Filenames, DirectoryZeroFileIsRelativeToCompilationDir) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "src/build", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "src/build", &lines, &cu_files); h.DefineDir("Dir", 1); h.DefineFile("File", 1, 0, 0, 0); @@ -219,7 +227,8 @@ TEST(Filenames, DirectoryZeroFileIsRelativeToCompilationDir) { TEST(Filenames, IncludeDirectoryRelativeToDirectoryZero) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "src/build", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "src/build", &lines, &cu_files); h.DefineDir("Dir", 1); h.DefineFile("File", 1, 1, 0, 0); @@ -235,7 +244,8 @@ TEST(Filenames, IncludeDirectoryRelativeToDirectoryZero) { TEST(Filenames, IncludeDirectoryAbsolute) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "src/build", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "src/build", &lines, &cu_files); h.DefineDir("/Dir", 1); h.DefineFile("File", 1, 1, 0, 0); @@ -251,7 +261,8 @@ TEST(Filenames, IncludeDirectoryAbsolute) { TEST(ModuleErrors, DirectoryZero) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineDir("directory0", 0); // should be ignored h.DefineFile("relative", 1, 0, 0, 0); @@ -267,7 +278,8 @@ TEST(ModuleErrors, DirectoryZero) { TEST(ModuleErrors, BadFileNumber) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineFile("relative", 1, 0, 0, 0); h.AddLine(1, 1, 2, 0, 0); // bad file number @@ -281,7 +293,8 @@ TEST(ModuleErrors, BadFileNumber) { TEST(ModuleErrors, BadDirectoryNumber) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineDir("directory1", 1); h.DefineFile("baddirnumber1", 1, 2, 0, 0); // bad directory number @@ -296,7 +309,8 @@ TEST(ModuleErrors, BadDirectoryNumber) { TEST(ModuleErrors, EmptyLine) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(1, 0, 1, 0, 0); @@ -309,7 +323,8 @@ TEST(ModuleErrors, EmptyLine) { TEST(ModuleErrors, BigLine) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0xffffffffffffffffULL, 2, 1, 0, 0); @@ -326,7 +341,8 @@ TEST(ModuleErrors, BigLine) { TEST(Omitted, DroppedThenGood) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0, 10, 1, 83816211, 0); // should be omitted @@ -339,7 +355,8 @@ TEST(Omitted, DroppedThenGood) { TEST(Omitted, GoodThenDropped) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0x9dd6a372, 10, 1, 41454594, 0); // should be recorded @@ -352,7 +369,8 @@ TEST(Omitted, GoodThenDropped) { TEST(Omitted, Mix1) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0x679ed72f, 10, 1, 58932642, 0); // should be recorded @@ -373,7 +391,8 @@ TEST(Omitted, Mix1) { TEST(Omitted, Mix2) { Module m("name", "os", "architecture", "id"); vector<Module::Line> lines; - DwarfLineToModule h(&m, "/", &lines); + std::map<uint32_t, Module::File*> cu_files; + DwarfLineToModule h(&m, "/", &lines, &cu_files); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0, 0xf2, 1, 58802211, 0); // should be omitted diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 5909d6ba..92a260ba 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -260,13 +260,16 @@ class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler { void StartCompilationUnit(const string& compilation_dir) { compilation_dir_ = compilation_dir; } - void ReadProgram(const uint8_t* program, uint64_t length, + void ReadProgram(const uint8_t* program, + uint64_t length, const uint8_t* string_section, uint64_t string_section_length, const uint8_t* line_string_section, uint64_t line_string_section_length, - Module* module, std::vector<Module::Line>* lines) { - DwarfLineToModule handler(module, compilation_dir_, lines); + Module* module, + std::vector<Module::Line>* lines, + std::map<uint32_t, Module::File*>* files) { + DwarfLineToModule handler(module, compilation_dir_, lines, files); google_breakpad::LineInfo parser(program, length, byte_reader_, string_section, string_section_length, line_string_section, diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc index e30d8ea9..3592e4bb 100644 --- a/src/common/mac/dump_syms.cc +++ b/src/common/mac/dump_syms.cc @@ -351,15 +351,18 @@ class DumpSymbols::DumperLineToModule: compilation_dir_ = compilation_dir; } - void ReadProgram(const uint8_t* program, uint64_t length, + void ReadProgram(const uint8_t* program, + uint64_t length, const uint8_t* string_section, uint64_t string_section_length, const uint8_t* line_string_section, uint64_t line_string_section_length, - Module* module, vector<Module::Line>* lines) { - DwarfLineToModule handler(module, compilation_dir_, lines); - LineInfo parser(program, length, byte_reader_, - nullptr, 0, nullptr, 0, &handler); + Module* module, + vector<Module::Line>* lines, + std::map<uint32_t, Module::File*>* files) { + DwarfLineToModule handler(module, compilation_dir_, lines, files); + LineInfo parser(program, length, byte_reader_, nullptr, 0, + nullptr, 0, &handler); parser.Start(); } private: diff --git a/src/common/module.cc b/src/common/module.cc index 0ecf6ca6..eccd01f0 100644 --- a/src/common/module.cc +++ b/src/common/module.cc @@ -38,14 +38,16 @@ #include <stdio.h> #include <string.h> +#include <functional> #include <iostream> +#include <memory> #include <utility> namespace google_breakpad { using std::dec; using std::hex; - +using std::unique_ptr; Module::Module(const string& name, const string& os, const string& architecture, const string& id, @@ -214,6 +216,13 @@ void Module::AssignSourceIds() { line_it != func->lines.end(); ++line_it) line_it->file->source_id = 0; } + // Also mark all files cited by inline functions by setting each one's source + // id to zero. + for (InlineOrigin* origin : inline_origins_) + // There are some artificial inline functions which don't belong to + // any file. Those will have file id -1. + if (origin->file) + origin->file->source_id = 0; // Finally, assign source ids to those files that have been marked. // We could have just assigned source id numbers while traversing @@ -227,6 +236,33 @@ void Module::AssignSourceIds() { } } +static void InlineDFS( + vector<unique_ptr<Module::Inline>>& inlines, + std::function<void(unique_ptr<Module::Inline>&)> const& forEach) { + for (unique_ptr<Module::Inline>& in : inlines) { + forEach(in); + InlineDFS(in->child_inlines, forEach); + } +} + +void Module::CreateInlineOrigins() { + // Only add origins that have file and deduplicate origins with same name and + // file id by doing a DFS. + auto addInlineOrigins = [&](unique_ptr<Inline> &in) { + auto it = inline_origins_.find(in->origin); + if (it == inline_origins_.end()) + inline_origins_.insert(in->origin); + else + in->origin = *it; + }; + for (Function* func : functions_) + InlineDFS(func->inlines, addInlineOrigins); + int next_id = 0; + for (InlineOrigin* origin: inline_origins_) { + origin->id = next_id++; + } +} + bool Module::ReportError() { fprintf(stderr, "error writing symbol file: %s\n", strerror(errno)); @@ -267,6 +303,8 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { } if (symbol_data & SYMBOLS_AND_FILES) { + if (symbol_data & INLINES) + CreateInlineOrigins(); AssignSourceIds(); // Write out files. @@ -279,8 +317,17 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { return ReportError(); } } + // Write out inline origins. + if (symbol_data & INLINES) { + for (InlineOrigin* origin : inline_origins_) { + stream << "INLINE_ORIGIN " << origin->id << " " << origin->getFileID() + << " " << origin->name << "\n"; + if (!stream.good()) + return ReportError(); + } + } - // Write out functions and their lines. + // Write out functions and their inlines and lines. for (FunctionSet::const_iterator func_it = functions_.begin(); func_it != functions_.end(); ++func_it) { Function* func = *func_it; @@ -296,6 +343,21 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { if (!stream.good()) return ReportError(); + // Write out inlines. + if (symbol_data & INLINES) { + auto write_inline = [&](unique_ptr<Inline>& in) { + stream << "INLINE "; + stream << in->inline_nest_level << " " << in->call_site_line << " " + << in->origin->id << hex; + for (const Range& r : in->ranges) + stream << " " << (r.address - load_address_) << " " << r.size; + stream << dec << "\n"; + }; + InlineDFS(func->inlines, write_inline); + if (!stream.good()) + return ReportError(); + } + while ((line_it != func->lines.end()) && (line_it->address >= range_it->address) && (line_it->address < (range_it->address + range_it->size))) { diff --git a/src/common/module.h b/src/common/module.h index f2fff490..e8678914 100644 --- a/src/common/module.h +++ b/src/common/module.h @@ -41,6 +41,7 @@ #include <iostream> #include <limits> #include <map> +#include <memory> #include <set> #include <string> #include <vector> @@ -66,6 +67,8 @@ class Module { static constexpr uint64_t kMaxAddress = std::numeric_limits<Address>::max(); struct File; struct Function; + struct InlineOrigin; + struct Inline; struct Line; struct Extern; @@ -120,6 +123,50 @@ class Module { // Source lines belonging to this function, sorted by increasing // address. vector<Line> lines; + + // Inlined call sites belonging to this functions. + vector<std::unique_ptr<Inline>> inlines; + }; + + struct InlineOrigin { + InlineOrigin(const string& name): id(-1), name(name), file(NULL) {} + + // A unique id for each InlineOrigin object. INLINE records use the id to + // refer to its INLINE_ORIGIN record. + int id; + + // The inlined function's name. + string name; + + File* file; + + int getFileID() const { return file ? file->source_id : -1; } + }; + + // A inlined call site. + struct Inline { + Inline(InlineOrigin* origin, + const vector<Range>& ranges, + int call_site_line, + int inline_nest_level, + vector<std::unique_ptr<Inline>> child_inlines) + : origin(origin), + ranges(ranges), + call_site_line(call_site_line), + inline_nest_level(inline_nest_level), + child_inlines(std::move(child_inlines)) {} + + InlineOrigin* origin; + + // The list of addresses and sizes. + vector<Range> ranges; + + int call_site_line; + + int inline_nest_level; + + // A list of inlines which are children of this inline. + vector<std::unique_ptr<Inline>> child_inlines; }; // A source line. @@ -179,6 +226,14 @@ class Module { } }; + struct InlineOriginCompare { + bool operator() (const InlineOrigin* lhs, const InlineOrigin* rhs) const { + if (lhs->getFileID() == rhs->getFileID()) + return lhs->name < rhs->name; + return lhs->getFileID() < rhs->getFileID(); + } + }; + struct ExternCompare { bool operator() (const Extern* lhs, const Extern* rhs) const { return lhs->address < rhs->address; @@ -275,6 +330,10 @@ class Module { // symbol file, at which point we omit any unused files. void AssignSourceIds(); + // This function should be called before AssignSourceIds() to get the set of + // valid InlineOrigins*. + void CreateInlineOrigins(); + // Call AssignSourceIds, and write this module to STREAM in the // breakpad symbol format. Return true if all goes well, or false if // an error occurs. This method writes out: @@ -334,6 +393,9 @@ class Module { // A set containing Function structures, sorted by address. typedef set<Function*, FunctionCompare> FunctionSet; + // A set containing Function structures, sorted by address. + typedef set<InlineOrigin*, InlineOriginCompare> InlineOriginSet; + // A set containing Extern structures, sorted by address. typedef set<Extern*, ExternCompare> ExternSet; @@ -342,6 +404,7 @@ class Module { // point to. FileByNameMap files_; // This module's source files. FunctionSet functions_; // This module's functions. + InlineOriginSet inline_origins_; // This module's inline origins. // The module owns all the call frame info entries that have been // added to it. diff --git a/src/tools/linux/dump_syms/dump_syms.cc b/src/tools/linux/dump_syms/dump_syms.cc index a562bffb..b0f56e95 100644 --- a/src/tools/linux/dump_syms/dump_syms.cc +++ b/src/tools/linux/dump_syms/dump_syms.cc @@ -50,6 +50,7 @@ int usage(const char* self) { fprintf(stderr, "Options:\n"); fprintf(stderr, " -i: Output module header information only.\n"); fprintf(stderr, " -c Do not generate CFI section\n"); + fprintf(stderr, " -d Generate INLINE/INLINE_ORIGIN records\n"); fprintf(stderr, " -r Do not handle inter-compilation " "unit references\n"); fprintf(stderr, " -v Print all warnings to stderr\n"); @@ -64,6 +65,7 @@ int main(int argc, char** argv) { return usage(argv[0]); bool header_only = false; bool cfi = true; + bool handle_inlines = false; bool handle_inter_cu_refs = true; bool log_to_stderr = false; std::string obj_name; @@ -75,6 +77,8 @@ int main(int argc, char** argv) { header_only = true; } else if (strcmp("-c", argv[arg_index]) == 0) { cfi = false; + } else if (strcmp("-d", argv[arg_index]) == 0) { + handle_inlines = true; } else if (strcmp("-r", argv[arg_index]) == 0) { handle_inter_cu_refs = false; } else if (strcmp("-v", argv[arg_index]) == 0) { @@ -127,8 +131,8 @@ int main(int argc, char** argv) { return 1; } } else { - SymbolData symbol_data = - INLINES | (cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES; + SymbolData symbol_data = (handle_inlines ? INLINES : NO_DATA) | + (cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES; google_breakpad::DumpOptions options(symbol_data, handle_inter_cu_refs); if (!WriteSymbolFile(binary, obj_name, obj_os, debug_dirs, options, std::cout)) {