diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc index be004134..e9af9b24 100644 --- a/src/common/dwarf/dwarf2reader.cc +++ b/src/common/dwarf/dwarf2reader.cc @@ -76,7 +76,7 @@ CompilationUnit::CompilationUnit(const string& path, addr_buffer_(NULL), addr_buffer_length_(0), is_split_dwarf_(false), dwo_id_(0), dwo_name_(), skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0), - have_checked_for_dwp_(false), dwp_path_(), + str_offsets_base_(0), have_checked_for_dwp_(false), dwp_path_(), dwp_byte_reader_(), dwp_reader_() {} // Initialize a compilation unit from a .dwo or .dwp file. @@ -454,7 +454,7 @@ void CompilationUnit::ProcessFormStringIndex( uint64_t dieoffset, enum DwarfAttribute attr, enum DwarfForm form, uint64_t str_index) { const uint8_t* offset_ptr = - str_offsets_buffer_ + str_index * reader_->OffsetSize(); + str_offsets_buffer_ + str_offsets_base_ + str_index * reader_->OffsetSize(); const uint64_t offset = reader_->ReadOffset(offset_ptr); if (offset >= string_buffer_length_) { return; @@ -464,6 +464,161 @@ void CompilationUnit::ProcessFormStringIndex( ProcessAttributeString(dieoffset, attr, form, str); } +// Special function for pre-processing the DW_AT_str_offsets_base in a +// DW_TAG_compile_unit die (for DWARF v5). We must make sure to find and +// process the DW_AT_str_offsets_base attribute before attempting to read +// any string attribute in the compile unit. +const uint8_t* CompilationUnit::ProcessStrOffsetBaseAttribute( + uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr, + enum DwarfForm form, uint64_t implicit_const) { + size_t len; + + switch (form) { + // DW_FORM_indirect is never used because it is such a space + // waster. + case DW_FORM_indirect: + form = static_cast(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return ProcessStrOffsetBaseAttribute(dieoffset, start, attr, form, + implicit_const); + + case DW_FORM_flag_present: + return start; + case DW_FORM_data1: + case DW_FORM_flag: + return start + 1; + case DW_FORM_data2: + return start + 2; + case DW_FORM_data4: + return start + 4; + case DW_FORM_data8: + return start + 8; + case DW_FORM_data16: + // This form is designed for an md5 checksum inside line tables. + return start + 16; + case DW_FORM_string: { + const char* str = reinterpret_cast(start); + return start + strlen(str) + 1; + } + case DW_FORM_udata: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + case DW_FORM_sdata: + reader_->ReadSignedLEB128(start, &len); + return start + len; + case DW_FORM_addr: + reader_->ReadAddress(start); + return start + reader_->AddressSize(); + + // This is the important one here! + case DW_FORM_sec_offset: + if (attr == dwarf2reader::DW_AT_str_offsets_base) + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOffset(start)); + else + reader_->ReadOffset(start); + return start + reader_->OffsetSize(); + + case DW_FORM_ref1: + return start + 1; + case DW_FORM_ref2: + return start + 2; + case DW_FORM_ref4: + return start + 4; + case DW_FORM_ref8: + return start + 8; + case DW_FORM_ref_udata: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + case DW_FORM_ref_addr: + // DWARF2 and 3/4 differ on whether ref_addr is address size or + // offset size. + assert(header_.version >= 2); + if (header_.version == 2) { + reader_->ReadAddress(start); + return start + reader_->AddressSize(); + } else if (header_.version >= 3) { + reader_->ReadOffset(start); + return start + reader_->OffsetSize(); + } + break; + case DW_FORM_ref_sig8: + return start + 8; + case DW_FORM_implicit_const: + return start; + case DW_FORM_block1: { + uint64_t datalen = reader_->ReadOneByte(start); + return start + 1 + datalen; + } + case DW_FORM_block2: { + uint64_t datalen = reader_->ReadTwoBytes(start); + return start + 2 + datalen; + } + case DW_FORM_block4: { + uint64_t datalen = reader_->ReadFourBytes(start); + return start + 4 + datalen; + } + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len); + return start + datalen + len; + } + case DW_FORM_strp: { + const uint64_t offset = reader_->ReadOffset(start); + return start + reader_->OffsetSize(); + } + case DW_FORM_line_strp: { + const uint64_t offset = reader_->ReadOffset(start); + return start + reader_->OffsetSize(); + } + case DW_FORM_strp_sup: + return start + 4; + case DW_FORM_ref_sup4: + return start + 4; + case DW_FORM_ref_sup8: + return start + 8; + case DW_FORM_loclistx: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + case DW_FORM_strx: + case DW_FORM_GNU_str_index: { + uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len); + return start + len; + } + case DW_FORM_strx1: { + return start + 1; + } + case DW_FORM_strx2: { + return start + 2; + } + case DW_FORM_strx3: { + return start + 3; + } + case DW_FORM_strx4: { + return start + 4; + } + + case DW_FORM_addrx: + case DW_FORM_GNU_addr_index: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + case DW_FORM_addrx1: + return start + 1; + case DW_FORM_addrx2: + return start + 2; + case DW_FORM_addrx3: + return start + 3; + case DW_FORM_addrx4: + return start + 4; + case DW_FORM_rnglistx: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + } + fprintf(stderr, "Unhandled form type\n"); + return NULL; +} + // If one really wanted, you could merge SkipAttribute and // ProcessAttribute // This is all boring data manipulation and calling of the handler. @@ -699,6 +854,22 @@ const uint8_t* CompilationUnit::ProcessAttribute( const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset, const uint8_t* start, const Abbrev& abbrev) { + // With DWARF v5, the compile_unit die may contain a + // DW_AT_str_offsets_base. If it does, that attribute must be found + // and processed before trying to process the other attributes; otherwise + // the string values will all come out incorrect. + if (abbrev.tag == DW_TAG_compile_unit && header_.version == 5) { + uint64_t dieoffset_copy = dieoffset; + const uint8_t* start_copy = start; + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start_copy = ProcessStrOffsetBaseAttribute(dieoffset_copy, start_copy, + i->attr_, i->form_, + i->value_); + } + } + for (AttributeList::const_iterator i = abbrev.attributes.begin(); i != abbrev.attributes.end(); i++) { diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h index 4f90d920..92a05efe 100644 --- a/src/common/dwarf/dwarf2reader.h +++ b/src/common/dwarf/dwarf2reader.h @@ -541,6 +541,14 @@ class CompilationUnit { enum DwarfForm form, uint64_t implicit_const); + // Special version of ProcessAttribute, for finding str_offsets_base in + // DW_TAG_compile_unit, for DWARF v5. + const uint8_t* ProcessStrOffsetBaseAttribute(uint64_t dieoffset, + const uint8_t* start, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t implicit_const); + // Called when we have an attribute with unsigned data to give to // our handler. The attribute is for the DIE at OFFSET from the // beginning of compilation unit, has a name of ATTR, a form of @@ -557,6 +565,9 @@ class CompilationUnit { else if (attr == DW_AT_GNU_addr_base || attr == DW_AT_addr_base) { addr_base_ = data; } + else if (attr == DW_AT_str_offsets_base) { + str_offsets_base_ = data; + } else if (attr == DW_AT_GNU_ranges_base || attr == DW_AT_rnglists_base) { ranges_base_ = data; } @@ -726,6 +737,9 @@ class CompilationUnit { // The value of the DW_AT_GNU_addr_base attribute, if any. uint64_t addr_base_; + // The value of DW_AT_str_offsets_base attribute, if any. + uint64_t str_offsets_base_; + // True if we have already looked for a .dwp file. bool have_checked_for_dwp_; diff --git a/src/common/dwarf/dwarf2reader_die_unittest.cc b/src/common/dwarf/dwarf2reader_die_unittest.cc index f93a8e4a..e329630c 100644 --- a/src/common/dwarf/dwarf2reader_die_unittest.cc +++ b/src/common/dwarf/dwarf2reader_die_unittest.cc @@ -319,6 +319,90 @@ TEST_P(DwarfForms, addr) { ParseCompilationUnit(GetParam()); } +TEST_P(DwarfForms, strx1) { + if (GetParam().version != 5) { + return; + } + Label abbrev_table = abbrevs.Here(); + abbrevs.Abbrev(1, dwarf2reader::DW_TAG_compile_unit, + dwarf2reader::DW_children_no) + .Attribute(dwarf2reader::DW_AT_name, dwarf2reader::DW_FORM_strx1) + .Attribute(dwarf2reader::DW_AT_low_pc, dwarf2reader::DW_FORM_addr) + .Attribute(dwarf2reader::DW_AT_str_offsets_base, + dwarf2reader::DW_FORM_sec_offset) + .EndAbbrev() + .EndTable(); + + info.set_format_size(GetParam().format_size); + info.set_endianness(GetParam().endianness); + info.Header(GetParam().version, abbrev_table, GetParam().address_size, + dwarf2reader::DW_UT_compile) + .ULEB128(1) // abbrev index + .D8(2); // string index + + uint64_t value; + uint64_t offsets_base; + if (GetParam().address_size == 4) { + value = 0xc8e9ffcc; + offsets_base = 8; + info.D32(value); // low pc + info.D32(offsets_base); // str_offsets_base + } else { + value = 0xe942517fc2768564ULL; + offsets_base = 16; + info.D64(value); // low_pc + info.D64(offsets_base); // str_offsets_base + } + info.Finish(); + + Section debug_strings; + // no header, just a series of null-terminated strings. + debug_strings.AppendCString("apple"); // offset = 0 + debug_strings.AppendCString("bird"); // offset = 6 + debug_strings.AppendCString("canary"); // offset = 11 + debug_strings.AppendCString("dinosaur"); // offset = 18 + + Section str_offsets; + str_offsets.set_endianness(GetParam().endianness); + // Header for .debug_str_offsets + if (GetParam().address_size == 4) { + str_offsets.D32(24); // section length (4 bytes) + } else { + str_offsets.D32(0xffffffff); + str_offsets.D64(48); // section length (12 bytes) + } + str_offsets.D16(GetParam().version); // version (2 bytes) + str_offsets.D16(0); // padding (2 bytes) + + // .debug_str_offsets data (the offsets) + if (GetParam().address_size == 4) { + str_offsets.D32(0); + str_offsets.D32(6); + str_offsets.D32(11); + str_offsets.D32(18); + } else { + str_offsets.D64(0); + str_offsets.D64(6); + str_offsets.D64(11); + str_offsets.D64(18); + } + + + ExpectBeginCompilationUnit(GetParam(), dwarf2reader::DW_TAG_compile_unit); + EXPECT_CALL(handler, ProcessAttributeString(_, dwarf2reader::DW_AT_name, + dwarf2reader::DW_FORM_strx1, + "bird")) + .WillOnce(Return()); + EXPECT_CALL(handler, ProcessAttributeUnsigned(_, dwarf2reader::DW_AT_low_pc, + dwarf2reader::DW_FORM_addr, + value)) + .InSequence(s) + .WillOnce(Return()); + ExpectEndCompilationUnit(); + + ParseCompilationUnit(GetParam()); +} + TEST_P(DwarfForms, block2_empty) { StartSingleAttributeDIE(GetParam(), (DwarfTag) 0x16e4d2f7, (DwarfAttribute) 0xe52c4463, diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc index e70adc11..d6f86550 100644 --- a/src/common/dwarf_cu_to_module.cc +++ b/src/common/dwarf_cu_to_module.cc @@ -181,7 +181,8 @@ struct DwarfCUToModule::CUContext { high_pc(0), ranges_form(dwarf2reader::DW_FORM_sec_offset), ranges_data(0), - ranges_base(0) { } + ranges_base(0), + str_offsets_base(0) { } ~CUContext() { for (vector::iterator it = functions.begin(); @@ -224,6 +225,9 @@ struct DwarfCUToModule::CUContext { // form DW_FORM_addrxX is relative to this offset. uint64_t addr_base; + // Offset into this CU's contribution to .debug_str_offsets. + uint64_t str_offsets_base; + // Collect all the data from the CU that a RangeListReader needs to read a // range. bool AssembleRangeListInfo( @@ -909,6 +913,9 @@ void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr, case dwarf2reader::DW_AT_GNU_addr_base: cu_context_->addr_base = data; break; + case dwarf2reader::DW_AT_str_offsets_base: + cu_context_->str_offsets_base = data; + break; default: break; diff --git a/src/common/module_unittest.cc b/src/common/module_unittest.cc index cede09c3..7b1b9cda 100644 --- a/src/common/module_unittest.cc +++ b/src/common/module_unittest.cc @@ -613,4 +613,9 @@ TEST(Write, OutOfRangeAddresses) { "STACK CFI INIT 2000 100 \n", s.str().c_str()); + // Cleanup - Prevent Memory Leak errors. + delete (extern1); + delete (function); + delete (entry3); + delete (entry1); }