From 1f87c4a732fa5175218549b4d7053112dbd57894 Mon Sep 17 00:00:00 2001 From: "ted.mielczarek@gmail.com" Date: Wed, 23 Jan 2013 18:01:28 +0000 Subject: [PATCH] Include the compilation directory for FILE entries, making them absolute instead of relative A=Ryan Sleevi R=mark,ted at https://breakpad.appspot.com/385001/ git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@1106 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/common/dwarf_cu_to_module.cc | 18 +- src/common/dwarf_cu_to_module.h | 24 ++- src/common/dwarf_cu_to_module_unittest.cc | 51 +++-- src/common/dwarf_line_to_module.cc | 27 ++- src/common/dwarf_line_to_module.h | 8 +- src/common/dwarf_line_to_module_unittest.cc | 98 ++++++--- src/common/linux/dump_symbols.cc | 12 +- src/common/mac/dump_syms.mm | 14 +- src/tools/python/filter_syms.py | 204 ++++++++++++++++++ .../python/tests/filter_syms_unittest.py | 138 ++++++++++++ 10 files changed, 518 insertions(+), 76 deletions(-) create mode 100644 src/tools/python/filter_syms.py create mode 100644 src/tools/python/tests/filter_syms_unittest.py diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc index 127beacb..55f69130 100644 --- a/src/common/dwarf_cu_to_module.cc +++ b/src/common/dwarf_cu_to_module.cc @@ -623,7 +623,7 @@ void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) { } DwarfCUToModule::DwarfCUToModule(FileContext *file_context, - LineToModuleFunctor *line_reader, + LineToModuleHandler *line_reader, WarningReporter *reporter) : line_reader_(line_reader), has_source_line_info_(false) { cu_context_ = new CUContext(file_context, reporter); @@ -666,8 +666,16 @@ void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr, void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr, enum DwarfForm form, const string &data) { - if (attr == dwarf2reader::DW_AT_name) - cu_context_->reporter->SetCUName(data); + switch (attr) { + case dwarf2reader::DW_AT_name: + cu_context_->reporter->SetCUName(data); + break; + case dwarf2reader::DW_AT_comp_dir: + line_reader_->StartCompilationUnit(data); + break; + default: + break; + } } bool DwarfCUToModule::EndAttributes() { @@ -744,8 +752,8 @@ void DwarfCUToModule::ReadSourceLines(uint64 offset) { cu_context_->reporter->BadLineInfoOffset(offset); return; } - (*line_reader_)(section_start + offset, section_length - offset, - cu_context_->file_context->module, &lines_); + line_reader_->ReadProgram(section_start + offset, section_length - offset, + cu_context_->file_context->module, &lines_); } namespace { diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h index c7072b0f..85453316 100644 --- a/src/common/dwarf_cu_to_module.h +++ b/src/common/dwarf_cu_to_module.h @@ -92,21 +92,27 @@ class DwarfCUToModule: public dwarf2reader::RootDIEHandler { FilePrivate *file_private; }; - // An abstract base class for functors that handle DWARF line data + // An abstract base class for handlers that handle DWARF line data // for DwarfCUToModule. DwarfCUToModule could certainly just use // dwarf2reader::LineInfo itself directly, but decoupling things // this way makes unit testing a little easier. - class LineToModuleFunctor { + class LineToModuleHandler { public: - LineToModuleFunctor() { } - virtual ~LineToModuleFunctor() { } + LineToModuleHandler() { } + virtual ~LineToModuleHandler() { } + + // Called at the beginning of a new compilation unit, prior to calling + // ReadProgram(). compilation_dir will indicate the path that the + // current compilation unit was compiled in, consistent with the + // DW_AT_comp_dir DIE. + virtual void StartCompilationUnit(const string& compilation_dir) = 0; // Populate MODULE and LINES with source file names and code/line // mappings, given a pointer to some DWARF line number data // PROGRAM, and an overestimate of its size. Add no zero-length // lines to LINES. - virtual void operator()(const char *program, uint64 length, - Module *module, vector *lines) = 0; + virtual void ReadProgram(const char *program, uint64 length, + Module *module, vector *lines) = 0; }; // The interface DwarfCUToModule uses to report warnings. The member @@ -186,7 +192,7 @@ class DwarfCUToModule: public dwarf2reader::RootDIEHandler { // unit's line number data. Use REPORTER to report problems with the // data we find. DwarfCUToModule(FileContext *file_context, - LineToModuleFunctor *line_reader, + LineToModuleHandler *line_reader, WarningReporter *reporter); ~DwarfCUToModule(); @@ -247,8 +253,8 @@ class DwarfCUToModule: public dwarf2reader::RootDIEHandler { // owned by this DwarfCUToModule: the constructor sets them, and the // destructor deletes them. - // The functor to use to handle line number data. - LineToModuleFunctor *line_reader_; + // The handler to use to handle line number data. + LineToModuleHandler *line_reader_; // This compilation unit's context. CUContext *cu_context_; diff --git a/src/common/dwarf_cu_to_module_unittest.cc b/src/common/dwarf_cu_to_module_unittest.cc index 37061d2a..81e629b0 100644 --- a/src/common/dwarf_cu_to_module_unittest.cc +++ b/src/common/dwarf_cu_to_module_unittest.cc @@ -62,14 +62,11 @@ using ::testing::ValuesIn; // Mock classes. -class MockLineToModuleFunctor: public DwarfCUToModule::LineToModuleFunctor { +class MockLineToModuleHandler: public DwarfCUToModule::LineToModuleHandler { public: - MOCK_METHOD4(mock_apply, void(const char *program, uint64 length, - Module *module, vector *lines)); - void operator()(const char *program, uint64 length, - Module *module, vector *lines) { - mock_apply(program, length, module, lines); - } + MOCK_METHOD1(StartCompilationUnit, void(const string& compilation_dir)); + MOCK_METHOD4(ReadProgram, void(const char* program, uint64 length, + Module *module, vector *lines)); }; class MockWarningReporter: public DwarfCUToModule::WarningReporter { @@ -102,10 +99,10 @@ class CUFixtureBase { // appender(line_program, length, module, line_vector); // // will append lines to the end of line_vector. We can use this with - // MockLineToModuleFunctor like this: + // MockLineToModuleHandler like this: // - // MockLineToModuleFunctor l2m; - // EXPECT_CALL(l2m, mock_apply(_,_,_,_)) + // MockLineToModuleHandler l2m; + // EXPECT_CALL(l2m, ReadProgram(_,_,_,_)) // .WillOnce(DoAll(Invoke(appender), Return())); // // in which case calling l2m with some line vector will append lines. @@ -143,7 +140,8 @@ class CUFixtureBase { // By default, expect the line program reader not to be invoked. We // may override this in StartCU. - EXPECT_CALL(line_reader_, mock_apply(_,_,_,_)).Times(0); + EXPECT_CALL(line_reader_, StartCompilationUnit(_)).Times(0); + EXPECT_CALL(line_reader_, ReadProgram(_,_,_,_)).Times(0); // The handler will consult this section map to decide what to // pass to our line reader. @@ -153,7 +151,7 @@ class CUFixtureBase { // Add a line with the given address, size, filename, and line // number to the end of the statement list the handler will receive - // when it invokes its LineToModuleFunctor. Call this before calling + // when it invokes its LineToModuleHandler. Call this before calling // StartCU. void PushLine(Module::Address address, Module::Address size, const string &filename, int line_number); @@ -271,13 +269,17 @@ class CUFixtureBase { // report it as an unsigned value. bool language_signed_; + // If this is not empty, we'll give the CU a DW_AT_comp_dir attribute that + // indicates the path that this compilation unit was compiled in. + string compilation_dir_; + // If this is not empty, we'll give the CU a DW_AT_stmt_list // attribute that, when passed to line_reader_, adds these lines to the // provided lines array. vector lines_; // Mock line program reader. - MockLineToModuleFunctor line_reader_; + MockLineToModuleHandler line_reader_; AppendLinesFunctor appender_; static const char dummy_line_program_[]; static const size_t dummy_line_size_; @@ -311,6 +313,10 @@ void CUFixtureBase::PushLine(Module::Address address, Module::Address size, } void CUFixtureBase::StartCU() { + if (!compilation_dir_.empty()) + EXPECT_CALL(line_reader_, + StartCompilationUnit(compilation_dir_)).Times(1); + // If we have lines, make the line reader expect to be invoked at // most once. (Hey, if the handler can pass its tests without // bothering to read the line number data, that's great.) @@ -318,8 +324,8 @@ void CUFixtureBase::StartCU() { // initial expectation (no calls) in force. if (!lines_.empty()) EXPECT_CALL(line_reader_, - mock_apply(&dummy_line_program_[0], dummy_line_size_, - &module_, _)) + ReadProgram(&dummy_line_program_[0], dummy_line_size_, + &module_, _)) .Times(AtMost(1)) .WillOnce(DoAll(Invoke(appender_), Return())); @@ -333,6 +339,10 @@ void CUFixtureBase::StartCU() { root_handler_.ProcessAttributeString(dwarf2reader::DW_AT_name, dwarf2reader::DW_FORM_strp, "compilation-unit-name"); + if (!compilation_dir_.empty()) + root_handler_.ProcessAttributeString(dwarf2reader::DW_AT_comp_dir, + dwarf2reader::DW_FORM_strp, + compilation_dir_); if (!lines_.empty()) root_handler_.ProcessAttributeUnsigned(dwarf2reader::DW_AT_stmt_list, dwarf2reader::DW_FORM_ref4, @@ -626,6 +636,13 @@ void CUFixtureBase::TestLine(int i, int j, class SimpleCU: public CUFixtureBase, public Test { }; +TEST_F(SimpleCU, CompilationDir) { + compilation_dir_ = "/src/build/"; + + StartCU(); + root_handler_.Finish(); +} + TEST_F(SimpleCU, OneFunc) { PushLine(0x938cf8c07def4d34ULL, 0x55592d727f6cd01fLL, "line-file", 246571772); @@ -1414,8 +1431,8 @@ TEST_F(Specifications, InterCU) { Module m("module-name", "module-os", "module-arch", "module-id"); DwarfCUToModule::FileContext fc("dwarf-filename", &m); EXPECT_CALL(reporter_, UncoveredFunction(_)).WillOnce(Return()); - MockLineToModuleFunctor lr; - EXPECT_CALL(lr, mock_apply(_,_,_,_)).Times(0); + MockLineToModuleHandler lr; + EXPECT_CALL(lr, ReadProgram(_,_,_,_)).Times(0); // Kludge: satisfy reporter_'s expectation. reporter_.SetCUName("compilation-unit-name"); diff --git a/src/common/dwarf_line_to_module.cc b/src/common/dwarf_line_to_module.cc index 962848d1..258b0b60 100644 --- a/src/common/dwarf_line_to_module.cc +++ b/src/common/dwarf_line_to_module.cc @@ -48,13 +48,17 @@ static bool PathIsAbsolute(const string &path) { return (path.size() >= 1 && path[0] == '/'); } +static bool HasTrailingSlash(const string &path) { + return (path.size() >= 1 && path[path.size() - 1] == '/'); +} + // If PATH is an absolute path, return PATH. If PATH is a relative path, // treat it as relative to BASE and return the combined path. static string ExpandPath(const string &path, const string &base) { - if (PathIsAbsolute(path)) + if (PathIsAbsolute(path) || base.empty()) return path; - return base + "/" + path; + return base + (HasTrailingSlash(base) ? "" : "/") + path; } namespace google_breakpad { @@ -63,7 +67,7 @@ void DwarfLineToModule::DefineDir(const string &name, uint32 dir_num) { // Directory number zero is reserved to mean the compilation // directory. Silently ignore attempts to redefine it. if (dir_num != 0) - directories_[dir_num] = name; + directories_[dir_num] = ExpandPath(name, compilation_dir_); } void DwarfLineToModule::DefineFile(const string &name, int32 file_num, @@ -74,25 +78,26 @@ void DwarfLineToModule::DefineFile(const string &name, int32 file_num, else if (file_num > highest_file_number_) highest_file_number_ = file_num; - string full_name; - if (dir_num != 0) { + string dir_name; + if (dir_num == 0) { + // Directory number zero is the compilation directory, and is stored as + // an attribute on the compilation unit, rather than in the program table. + dir_name = compilation_dir_; + } else { DirectoryTable::const_iterator directory_it = directories_.find(dir_num); if (directory_it != directories_.end()) { - full_name = ExpandPath(name, directory_it->second); + dir_name = directory_it->second; } else { if (!warned_bad_directory_number_) { fprintf(stderr, "warning: DWARF line number data refers to undefined" " directory numbers\n"); warned_bad_directory_number_ = true; } - full_name = name; // just treat name as relative } - } else { - // Directory number zero is the compilation directory; we just report - // relative paths in that case. - full_name = name; } + string full_name = ExpandPath(name, dir_name); + // Find a Module::File object of the given name, and add it to the // file table. files_[file_num] = module_->FindFile(full_name); diff --git a/src/common/dwarf_line_to_module.h b/src/common/dwarf_line_to_module.h index 9382e40d..1fdd4cb7 100644 --- a/src/common/dwarf_line_to_module.h +++ b/src/common/dwarf_line_to_module.h @@ -120,8 +120,10 @@ class DwarfLineToModule: public dwarf2reader::LineInfoHandler { // end of the address space, we clip it. It's up to our client to // sort out which lines belong to which functions; we don't add them // to any particular function in MODULE ourselves. - DwarfLineToModule(Module *module, vector *lines) + DwarfLineToModule(Module *module, const string& compilation_dir, + vector *lines) : module_(module), + compilation_dir_(compilation_dir), lines_(lines), highest_file_number_(-1), omitted_line_end_(0), @@ -146,6 +148,10 @@ class DwarfLineToModule: public dwarf2reader::LineInfoHandler { // client. Module *module_; + // The compilation directory for the current compilation unit whose + // lines are being accumulated. + string compilation_dir_; + // The vector of lines we're accumulating. Owned by our client. // // In a Module, as in a breakpad symbol file, lines belong to diff --git a/src/common/dwarf_line_to_module_unittest.cc b/src/common/dwarf_line_to_module_unittest.cc index 1e123e97..7c0fcfd3 100644 --- a/src/common/dwarf_line_to_module_unittest.cc +++ b/src/common/dwarf_line_to_module_unittest.cc @@ -45,7 +45,7 @@ using google_breakpad::Module; TEST(SimpleModule, One) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineFile("file1", 0x30bf0f27, 0, 0, 0); h.AddLine(0x6fd126fbf74f2680LL, 0x63c9a14cf556712bLL, 0x30bf0f27, @@ -54,7 +54,7 @@ TEST(SimpleModule, One) { vector files; m.GetFiles(&files); EXPECT_EQ(1U, files.size()); - EXPECT_STREQ("file1", files[0]->name.c_str()); + EXPECT_STREQ("/file1", files[0]->name.c_str()); EXPECT_EQ(1U, lines.size()); EXPECT_EQ(0x6fd126fbf74f2680ULL, lines[0].address); @@ -66,7 +66,7 @@ TEST(SimpleModule, One) { TEST(SimpleModule, Many) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineDir("directory1", 0x838299ab); h.DefineDir("directory2", 0xf85de023); @@ -89,11 +89,11 @@ TEST(SimpleModule, Many) { vector files; m.GetFiles(&files); ASSERT_EQ(5U, files.size()); - EXPECT_STREQ("directory1/file1", files[0]->name.c_str()); - EXPECT_STREQ("directory1/file2", files[1]->name.c_str()); - EXPECT_STREQ("directory2/file1", files[2]->name.c_str()); - EXPECT_STREQ("directory2/file2", files[3]->name.c_str()); - EXPECT_STREQ("file3", files[4]->name.c_str()); + EXPECT_STREQ("/directory1/file1", files[0]->name.c_str()); + EXPECT_STREQ("/directory1/file2", files[1]->name.c_str()); + EXPECT_STREQ("/directory2/file1", files[2]->name.c_str()); + EXPECT_STREQ("/directory2/file2", files[3]->name.c_str()); + EXPECT_STREQ("/file3", files[4]->name.c_str()); ASSERT_EQ(5U, lines.size()); @@ -126,7 +126,7 @@ TEST(SimpleModule, Many) { TEST(Filenames, Absolute) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineDir("directory1", 1); h.DefineFile("/absolute", 1, 1, 0, 0); @@ -144,7 +144,7 @@ TEST(Filenames, Absolute) { TEST(Filenames, Relative) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineDir("directory1", 1); h.DefineFile("relative", 1, 1, 0, 0); @@ -154,7 +154,7 @@ TEST(Filenames, Relative) { vector files; m.GetFiles(&files); ASSERT_EQ(1U, files.size()); - EXPECT_STREQ("directory1/relative", files[0]->name.c_str()); + EXPECT_STREQ("/directory1/relative", files[0]->name.c_str()); ASSERT_EQ(1U, lines.size()); EXPECT_TRUE(lines[0].file == files[0]); } @@ -162,20 +162,20 @@ TEST(Filenames, Relative) { TEST(Filenames, StrangeFile) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineDir("directory1", 1); h.DefineFile("", 1, 1, 0, 0); h.AddLine(1, 1, 1, 0, 0); ASSERT_EQ(1U, lines.size()); - EXPECT_STREQ("directory1/", lines[0].file->name.c_str()); + EXPECT_STREQ("/directory1/", lines[0].file->name.c_str()); } TEST(Filenames, StrangeDirectory) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineDir("", 1); h.DefineFile("file1", 1, 1, 0, 0); @@ -188,7 +188,7 @@ TEST(Filenames, StrangeDirectory) { TEST(Filenames, StrangeDirectoryAndFile) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineDir("", 1); h.DefineFile("", 1, 1, 0, 0); @@ -198,12 +198,60 @@ TEST(Filenames, StrangeDirectoryAndFile) { EXPECT_STREQ("/", lines[0].file->name.c_str()); } +// We should use the compilation directory when encountering a file for +// directory number zero. +TEST(Filenames, DirectoryZeroFileIsRelativeToCompilationDir) { + Module m("name", "os", "architecture", "id"); + vector lines; + DwarfLineToModule h(&m, "src/build", &lines); + + h.DefineDir("Dir", 1); + h.DefineFile("File", 1, 0, 0, 0); + + h.AddLine(1, 1, 1, 0, 0); + + ASSERT_EQ(1U, lines.size()); + EXPECT_STREQ("src/build/File", lines[0].file->name.c_str()); +} + +// We should treat non-absolute directories as relative to the compilation +// directory. +TEST(Filenames, IncludeDirectoryRelativeToDirectoryZero) { + Module m("name", "os", "architecture", "id"); + vector lines; + DwarfLineToModule h(&m, "src/build", &lines); + + h.DefineDir("Dir", 1); + h.DefineFile("File", 1, 1, 0, 0); + + h.AddLine(1, 1, 1, 0, 0); + + ASSERT_EQ(1U, lines.size()); + EXPECT_STREQ("src/build/Dir/File", lines[0].file->name.c_str()); +} + +// We should treat absolute directories as absolute, and not relative to +// the compilation dir. +TEST(Filenames, IncludeDirectoryAbsolute) { + Module m("name", "os", "architecture", "id"); + vector lines; + DwarfLineToModule h(&m, "src/build", &lines); + + h.DefineDir("/Dir", 1); + h.DefineFile("File", 1, 1, 0, 0); + + h.AddLine(1, 1, 1, 0, 0); + + ASSERT_EQ(1U, lines.size()); + EXPECT_STREQ("/Dir/File", lines[0].file->name.c_str()); +} + // We should silently ignore attempts to define directory number zero, // since that is always the compilation directory. TEST(ModuleErrors, DirectoryZero) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineDir("directory0", 0); // should be ignored h.DefineFile("relative", 1, 0, 0, 0); @@ -211,7 +259,7 @@ TEST(ModuleErrors, DirectoryZero) { h.AddLine(1, 1, 1, 0, 0); ASSERT_EQ(1U, lines.size()); - EXPECT_STREQ("relative", lines[0].file->name.c_str()); + EXPECT_STREQ("/relative", lines[0].file->name.c_str()); } // We should refuse to add lines with bogus file numbers. We should @@ -219,7 +267,7 @@ TEST(ModuleErrors, DirectoryZero) { TEST(ModuleErrors, BadFileNumber) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineFile("relative", 1, 0, 0, 0); h.AddLine(1, 1, 2, 0, 0); // bad file number @@ -233,7 +281,7 @@ TEST(ModuleErrors, BadFileNumber) { TEST(ModuleErrors, BadDirectoryNumber) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineDir("directory1", 1); h.DefineFile("baddirnumber1", 1, 2, 0, 0); // bad directory number @@ -248,7 +296,7 @@ TEST(ModuleErrors, BadDirectoryNumber) { TEST(ModuleErrors, EmptyLine) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(1, 0, 1, 0, 0); @@ -261,7 +309,7 @@ TEST(ModuleErrors, EmptyLine) { TEST(ModuleErrors, BigLine) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0xffffffffffffffffULL, 2, 1, 0, 0); @@ -278,7 +326,7 @@ TEST(ModuleErrors, BigLine) { TEST(Omitted, DroppedThenGood) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0, 10, 1, 83816211, 0); // should be omitted @@ -291,7 +339,7 @@ TEST(Omitted, DroppedThenGood) { TEST(Omitted, GoodThenDropped) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0x9dd6a372, 10, 1, 41454594, 0); // should be recorded @@ -304,7 +352,7 @@ TEST(Omitted, GoodThenDropped) { TEST(Omitted, Mix1) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0x679ed72f, 10, 1, 58932642, 0); // should be recorded @@ -325,7 +373,7 @@ TEST(Omitted, Mix1) { TEST(Omitted, Mix2) { Module m("name", "os", "architecture", "id"); vector lines; - DwarfLineToModule h(&m, &lines); + DwarfLineToModule h(&m, "/", &lines); h.DefineFile("filename1", 1, 0, 0, 0); h.AddLine(0, 0xf2, 1, 58802211, 0); // should be omitted diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 4a85ab8f..b739bf71 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -186,18 +186,22 @@ bool LoadStabs(const typename ElfClass::Ehdr* elf_header, // A line-to-module loader that accepts line number info parsed by // dwarf2reader::LineInfo and populates a Module and a line vector // with the results. -class DumperLineToModule: public DwarfCUToModule::LineToModuleFunctor { +class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler { public: // Create a line-to-module converter using BYTE_READER. explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader) : byte_reader_(byte_reader) { } - void operator()(const char *program, uint64 length, - Module *module, std::vector *lines) { - DwarfLineToModule handler(module, lines); + void StartCompilationUnit(const string& compilation_dir) { + compilation_dir_ = compilation_dir; + } + void ReadProgram(const char *program, uint64 length, + Module *module, std::vector *lines) { + DwarfLineToModule handler(module, compilation_dir_, lines); dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); parser.Start(); } private: + string compilation_dir_; dwarf2reader::ByteReader *byte_reader_; }; diff --git a/src/common/mac/dump_syms.mm b/src/common/mac/dump_syms.mm index d79afe26..e26b05ea 100644 --- a/src/common/mac/dump_syms.mm +++ b/src/common/mac/dump_syms.mm @@ -227,18 +227,24 @@ string DumpSymbols::Identifier() { // dwarf2reader::LineInfo and populates a Module and a line vector // with the results. class DumpSymbols::DumperLineToModule: - public DwarfCUToModule::LineToModuleFunctor { + public DwarfCUToModule::LineToModuleHandler { public: // Create a line-to-module converter using BYTE_READER. DumperLineToModule(dwarf2reader::ByteReader *byte_reader) : byte_reader_(byte_reader) { } - void operator()(const char *program, uint64 length, - Module *module, vector *lines) { - DwarfLineToModule handler(module, lines); + + void StartCompilationUnit(const string& compilation_dir) { + compilation_dir_ = compilation_dir; + } + + void ReadProgram(const char *program, uint64 length, + Module *module, vector *lines) { + DwarfLineToModule handler(module, compilation_dir_, lines); dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); parser.Start(); } private: + string compilation_dir_; dwarf2reader::ByteReader *byte_reader_; // WEAK }; diff --git a/src/tools/python/filter_syms.py b/src/tools/python/filter_syms.py new file mode 100644 index 00000000..738cb3e9 --- /dev/null +++ b/src/tools/python/filter_syms.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python +# Copyright (c) 2012 Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Normalizes and de-duplicates paths within Breakpad symbol files. + +When using DWARF for storing debug symbols, some file information will be +stored relative to the current working directory of the current compilation +unit, and may be further relativized based upon how the file was #included. + +This helper can be used to parse the Breakpad symbol file generated from such +DWARF files and normalize and de-duplicate the FILE records found within, +updating any references to the FILE records in the other record types. +""" + +import macpath +import ntpath +import optparse +import os +import posixpath +import sys + +class BreakpadParseError(Exception): + """Unsupported Breakpad symbol record exception class.""" + pass + +class SymbolFileParser(object): + """Parser for Breakpad symbol files. + + The format of these files is documented at + https://code.google.com/p/google-breakpad/wiki/SymbolFiles + """ + + def __init__(self, input_stream, output_stream, ignored_prefixes=None, + path_handler=os.path): + """Inits a SymbolFileParser to read symbol records from |input_stream| and + write the processed output to |output_stream|. + + |ignored_prefixes| contains a list of optional path prefixes that + should be stripped from the final, normalized path outputs. + + For example, if the Breakpad symbol file had all paths starting with a + common prefix, such as: + FILE 1 /b/build/src/foo.cc + FILE 2 /b/build/src/bar.cc + Then adding "/b/build/src" as an ignored prefix would result in an output + file that contained: + FILE 1 foo.cc + FILE 2 bar.cc + + Note that |ignored_prefixes| does not necessarily contain file system + paths, as the contents of the DWARF DW_AT_comp_dir attribute is dependent + upon the host system and compiler, and may contain additional information + such as hostname or compiler version. + """ + + self.unique_files = {} + self.duplicate_files = {} + self.input_stream = input_stream + self.output_stream = output_stream + self.ignored_prefixes = ignored_prefixes or [] + self.path_handler = path_handler + + def Process(self): + """Processes the Breakpad symbol file.""" + for line in self.input_stream: + parsed = self._ParseRecord(line.rstrip()) + if parsed: + self.output_stream.write(parsed + '\n') + + def _ParseRecord(self, record): + """Parses a single Breakpad symbol record - a single line from the symbol + file. + + Returns: + The modified string to write to the output file, or None if no line + should be written. + """ + record_type = record.partition(' ')[0] + if record_type == 'FILE': + return self._ParseFileRecord(record) + elif self._IsLineRecord(record_type): + return self._ParseLineRecord(record) + else: + # Simply pass the record through unaltered. + return record + + def _NormalizePath(self, path): + """Normalizes a file path to its canonical form. + + As this may not execute on the machine or file system originally + responsible for compilation, it may be necessary to further correct paths + for symlinks, junctions, or other such file system indirections. + + Returns: + A unique, canonical representation for the the file path. + """ + return self.path_handler.normpath(path) + + def _AdjustPath(self, path): + """Adjusts the supplied path after performing path de-duplication. + + This may be used to perform secondary adjustments, such as removing a + common prefix, such as "/D/build", or replacing the file system path with + information from the version control system. + + Returns: + The actual path to use when writing the FILE record. + """ + return path[len(filter(path.startswith, + self.ignored_prefixes + [''])[0]):] + + def _ParseFileRecord(self, file_record): + """Parses and corrects a FILE record.""" + file_info = file_record[5:].split(' ', 3) + if len(file_info) > 2: + raise BreakpadParseError('Unsupported FILE record: ' + file_record) + file_index = int(file_info[0]) + file_name = self._NormalizePath(file_info[1]) + existing_file_index = self.unique_files.get(file_name) + if existing_file_index is None: + self.unique_files[file_name] = file_index + file_info[1] = self._AdjustPath(file_name) + return 'FILE ' + ' '.join(file_info) + else: + self.duplicate_files[file_index] = existing_file_index + return None + + def _IsLineRecord(self, record_type): + """Determines if the current record type is a Line record""" + try: + line = int(record_type, 16) + except (ValueError, TypeError): + return False + return True + + def _ParseLineRecord(self, line_record): + """Parses and corrects a Line record.""" + line_info = line_record.split(' ', 5) + if len(line_info) > 4: + raise BreakpadParseError('Unsupported Line record: ' + line_record) + file_index = int(line_info[3]) + line_info[3] = str(self.duplicate_files.get(file_index, file_index)) + return ' '.join(line_info) + +def main(): + option_parser = optparse.OptionParser() + option_parser.add_option("-p", "--prefix", + action="append", dest="prefixes", type="string", + default=[], + help="A path prefix that should be removed from " + "all FILE lines. May be repeated to specify " + "multiple prefixes.") + option_parser.add_option("-t", "--path_type", + action="store", type="choice", dest="path_handler", + choices=['win32', 'posix'], + help="Indicates how file paths should be " + "interpreted. The default is to treat paths " + "the same as the OS running Python (eg: " + "os.path)") + options, args = option_parser.parse_args() + if args: + option_parser.error('Unknown argument: %s' % args) + + path_handler = { 'win32': ntpath, + 'posix': posixpath }.get(options.path_handler, os.path) + try: + symbol_parser = SymbolFileParser(sys.stdin, sys.stdout, options.prefixes, + path_handler) + symbol_parser.Process() + except BreakpadParseError, e: + print >> sys.stderr, 'Got an error while processing symbol file' + print >> sys.stderr, str(e) + return 1 + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/tools/python/tests/filter_syms_unittest.py b/src/tools/python/tests/filter_syms_unittest.py new file mode 100644 index 00000000..b111f349 --- /dev/null +++ b/src/tools/python/tests/filter_syms_unittest.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# Copyright (c) 2012 Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Unit tests for filter_syms.py""" + +import cStringIO +import ntpath +import os +import StringIO +import sys +import unittest + +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, os.path.join(ROOT_DIR, '..')) + +# In root +import filter_syms + +class FilterSysmsTest(unittest.TestCase): + def assertParsed(self, input_data, ignored_prefixes, expected): + input_io = cStringIO.StringIO(input_data) + output_io = cStringIO.StringIO() + parser = filter_syms.SymbolFileParser(input_io, output_io, + ignored_prefixes, ntpath) + parser.Process() + self.assertEqual(output_io.getvalue(), expected) + + def testDuplicateFiles(self): + """Tests that duplicate files in FILE records are correctly removed and + that Line records are updated.""" + + INPUT = \ +"""MODULE windows x86 111111111111111111111111111111111 module1.pdb +INFO CODE_ID FFFFFFFF module1.exe +FILE 1 foo/../file1_1.cc +FILE 2 bar/../file1_1.cc +FILE 3 baz/../file1_1.cc +FUNC 1000 c 0 Function1_1 +1000 8 45 2 +1008 4 46 3 +100c 4 44 1 +""" + EXPECTED_OUTPUT = \ +"""MODULE windows x86 111111111111111111111111111111111 module1.pdb +INFO CODE_ID FFFFFFFF module1.exe +FILE 1 file1_1.cc +FUNC 1000 c 0 Function1_1 +1000 8 45 1 +1008 4 46 1 +100c 4 44 1 +""" + self.assertParsed(INPUT, [], EXPECTED_OUTPUT) + + def testIgnoredPrefix(self): + """Tests that prefixes in FILE records are correctly removed.""" + + INPUT = \ +"""MODULE windows x86 111111111111111111111111111111111 module1.pdb +INFO CODE_ID FFFFFFFF module1.exe +FILE 1 /src/build/foo/../file1_1.cc +FILE 2 /src/build/bar/../file1_2.cc +FILE 3 /src/build/baz/../file1_2.cc +FUNC 1000 c 0 Function1_1 +1000 8 45 2 +1008 4 46 3 +100c 4 44 1 +""" + EXPECTED_OUTPUT = \ +"""MODULE windows x86 111111111111111111111111111111111 module1.pdb +INFO CODE_ID FFFFFFFF module1.exe +FILE 1 file1_1.cc +FILE 2 file1_2.cc +FUNC 1000 c 0 Function1_1 +1000 8 45 2 +1008 4 46 2 +100c 4 44 1 +""" + IGNORED_PREFIXES = ['\\src\\build\\'] + self.assertParsed(INPUT, IGNORED_PREFIXES, EXPECTED_OUTPUT) + + def testIgnoredPrefixesDuplicateFiles(self): + """Tests that de-duplication of FILE records happens BEFORE prefixes + are removed.""" + + INPUT = \ +"""MODULE windows x86 111111111111111111111111111111111 module1.pdb +INFO CODE_ID FFFFFFFF module1.exe +FILE 1 /src/build/foo/../file1_1.cc +FILE 2 /src/build/bar/../file1_2.cc +FILE 3 D:/src/build2/baz/../file1_2.cc +FUNC 1000 c 0 Function1_1 +1000 8 45 2 +1008 4 46 3 +100c 4 44 1 +""" + EXPECTED_OUTPUT = \ +"""MODULE windows x86 111111111111111111111111111111111 module1.pdb +INFO CODE_ID FFFFFFFF module1.exe +FILE 1 file1_1.cc +FILE 2 file1_2.cc +FILE 3 file1_2.cc +FUNC 1000 c 0 Function1_1 +1000 8 45 2 +1008 4 46 3 +100c 4 44 1 +""" + IGNORED_PREFIXES = ['\\src\\build\\', 'D:\\src\\build2\\'] + self.assertParsed(INPUT, IGNORED_PREFIXES, EXPECTED_OUTPUT) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file