linux, dump_syms: Filter module entries outside specified ranges

Partitioned libraries generated with lld and llvm-objcopy currently
contain a superset of debug information, beyond what applies to the
library itself. This is because objcopy cannot split up debug
information by partition - instead, it places a copy of all debug
information into each partition.

In lieu of potential future support for lld or objcopy becoming able to
split up debug information, let dump_syms do the next best thing:

- Find the address ranges of all PT_LOAD segments in the lib.
- Supply these to the Module being generated.
- Filter additions to the Module based on these ranges.

Bug: 990190
Change-Id: Ib5f279f42e3f6ea79eed9665efbcc23c3c5d25dc
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/1884699
Reviewed-by: Joshua Peraza <jperaza@chromium.org>
This commit is contained in:
Christopher Grant 2019-10-29 14:56:38 -04:00 committed by Joshua Peraza
parent 17958ef62c
commit 862c9f47ef
4 changed files with 120 additions and 0 deletions

View file

@ -182,6 +182,23 @@ typename ElfClass::Addr GetLoadingAddress(
return 0;
}
// Find the set of address ranges for all PT_LOAD segments.
template <typename ElfClass>
vector<Module::Range> GetPtLoadSegmentRanges(
const typename ElfClass::Phdr* program_headers,
int nheader) {
typedef typename ElfClass::Phdr Phdr;
vector<Module::Range> ranges;
for (int i = 0; i < nheader; ++i) {
const Phdr& header = program_headers[i];
if (header.p_type == PT_LOAD) {
ranges.push_back(Module::Range(header.p_vaddr, header.p_memsz));
}
}
return ranges;
}
#ifndef NO_STABS_SUPPORT
template<typename ElfClass>
bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
@ -649,6 +666,14 @@ bool LoadSymbols(const string& obj_file,
module->SetLoadAddress(loading_addr);
info->set_loading_addr(loading_addr, obj_file);
// Allow filtering of extraneous debug information in partitioned libraries.
// Such libraries contain debug information for all libraries extracted from
// the same combined library, implying extensive duplication.
vector<Module::Range> address_ranges = GetPtLoadSegmentRanges<ElfClass>(
GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
elf_header->e_phnum);
module->SetAddressRanges(address_ranges);
const Shdr* sections =
GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
const Shdr* section_names = sections + elf_header->e_shstrndx;

View file

@ -76,11 +76,19 @@ void Module::SetLoadAddress(Address address) {
load_address_ = address;
}
void Module::SetAddressRanges(const vector<Range>& ranges) {
address_ranges_ = ranges;
}
void Module::AddFunction(Function *function) {
// FUNC lines must not hold an empty name, so catch the problem early if
// callers try to add one.
assert(!function->name.empty());
if (!AddressIsInModule(function->address)) {
return;
}
// FUNCs are better than PUBLICs as they come with sizes, so remove an extern
// with the same address if present.
Extern ext(function->address);
@ -123,10 +131,18 @@ void Module::AddFunctions(vector<Function *>::iterator begin,
}
void Module::AddStackFrameEntry(StackFrameEntry *stack_frame_entry) {
if (!AddressIsInModule(stack_frame_entry->address)) {
return;
}
stack_frame_entries_.push_back(stack_frame_entry);
}
void Module::AddExtern(Extern *ext) {
if (!AddressIsInModule(ext->address)) {
return;
}
std::pair<ExternSet::iterator,bool> ret = externs_.insert(ext);
if (!ret.second) {
// Free the duplicate that was not inserted because this Module
@ -232,6 +248,19 @@ bool Module::WriteRuleMap(const RuleMap &rule_map, std::ostream &stream) {
return stream.good();
}
bool Module::AddressIsInModule(Address address) const {
if (address_ranges_.empty()) {
return true;
}
for (const auto& segment : address_ranges_) {
if (address >= segment.address &&
address < segment.address + segment.size) {
return true;
}
}
return false;
}
bool Module::Write(std::ostream &stream, SymbolData symbol_data) {
stream << "MODULE " << os_ << " " << architecture_ << " "
<< id_ << " " << name_ << "\n";

View file

@ -205,6 +205,14 @@ class Module {
// Write is used.
void SetLoadAddress(Address load_address);
// Sets address filtering on elements added to the module. This allows
// libraries with extraneous debug symbols to generate symbol files containing
// only relevant symbols. For example, an LLD-generated partition library may
// contain debug information pertaining to all partitions derived from a
// single "combined" library. Filtering applies only to elements added after
// this method is called.
void SetAddressRanges(const vector<Range>& ranges);
// Add FUNCTION to the module. FUNCTION's name must not be empty.
// This module owns all Function objects added with this function:
// destroying the module destroys them as well.
@ -302,6 +310,10 @@ class Module {
// if an error occurs, return false, and leave errno set.
static bool WriteRuleMap(const RuleMap &rule_map, std::ostream &stream);
// Returns true of the specified address resides with an specified address
// range, or if no ranges have been specified.
bool AddressIsInModule(Address address) const;
// Module header entries.
string name_, os_, architecture_, id_, code_id_;
@ -310,6 +322,10 @@ class Module {
// address.
Address load_address_;
// The set of valid address ranges of the module. If specified, attempts to
// add elements residing outside these ranges will be silently filtered.
vector<Range> address_ranges_;
// Relation for maps whose keys are strings shared with some other
// structure.
struct CompareStringPtrs {

View file

@ -564,3 +564,53 @@ TEST(Construct, FunctionsAndThumbExternsWithSameAddress) {
"PUBLIC cc00 0 arm_func\n",
contents.c_str());
}
TEST(Write, OutOfRangeAddresses) {
stringstream s;
Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID);
// Specify an allowed address range, representing a PT_LOAD segment in a
// module.
vector<Module::Range> address_ranges = {
Module::Range(0x2000ULL, 0x1000ULL),
};
m.SetAddressRanges(address_ranges);
// Add three stack frames (one lower, one in, and one higher than the allowed
// address range). Only the middle frame should be captured.
Module::StackFrameEntry* entry1 = new Module::StackFrameEntry();
entry1->address = 0x1000ULL;
entry1->size = 0x100ULL;
m.AddStackFrameEntry(entry1);
Module::StackFrameEntry* entry2 = new Module::StackFrameEntry();
entry2->address = 0x2000ULL;
entry2->size = 0x100ULL;
m.AddStackFrameEntry(entry2);
Module::StackFrameEntry* entry3 = new Module::StackFrameEntry();
entry3->address = 0x3000ULL;
entry3->size = 0x100ULL;
m.AddStackFrameEntry(entry3);
// Add a function outside the allowed range.
Module::File* file = m.FindFile("file_name.cc");
Module::Function* function = new Module::Function(
"function_name", 0x4000ULL);
Module::Range range(0x4000ULL, 0x1000ULL);
function->ranges.push_back(range);
function->parameter_size = 0x100ULL;
Module::Line line = { 0x4000ULL, 0x100ULL, file, 67519080 };
function->lines.push_back(line);
m.AddFunction(function);
// Add an extern outside the allowed range.
Module::Extern* extern1 = new Module::Extern(0x5000ULL);
extern1->name = "_xyz";
m.AddExtern(extern1);
m.Write(s, ALL_SYMBOL_DATA);
EXPECT_STREQ("MODULE os-name architecture id-string name with spaces\n"
"STACK CFI INIT 2000 100 \n",
s.str().c_str());
}