From 1f87c4a732fa5175218549b4d7053112dbd57894 Mon Sep 17 00:00:00 2001
From: "ted.mielczarek@gmail.com"
 <ted.mielczarek@gmail.com@4c0a9323-5329-0410-9bdc-e9ce6186880e>
Date: Wed, 23 Jan 2013 18:01:28 +0000
Subject: [PATCH] Include the compilation directory for FILE entries, making
 them absolute instead of relative A=Ryan Sleevi <rsleevi@chromium.org>
 R=mark,ted at https://breakpad.appspot.com/385001/

git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@1106 4c0a9323-5329-0410-9bdc-e9ce6186880e
---
 src/common/dwarf_cu_to_module.cc              |  18 +-
 src/common/dwarf_cu_to_module.h               |  24 ++-
 src/common/dwarf_cu_to_module_unittest.cc     |  51 +++--
 src/common/dwarf_line_to_module.cc            |  27 ++-
 src/common/dwarf_line_to_module.h             |   8 +-
 src/common/dwarf_line_to_module_unittest.cc   |  98 ++++++---
 src/common/linux/dump_symbols.cc              |  12 +-
 src/common/mac/dump_syms.mm                   |  14 +-
 src/tools/python/filter_syms.py               | 204 ++++++++++++++++++
 .../python/tests/filter_syms_unittest.py      | 138 ++++++++++++
 10 files changed, 518 insertions(+), 76 deletions(-)
 create mode 100644 src/tools/python/filter_syms.py
 create mode 100644 src/tools/python/tests/filter_syms_unittest.py

diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc
index 127beacb..55f69130 100644
--- a/src/common/dwarf_cu_to_module.cc
+++ b/src/common/dwarf_cu_to_module.cc
@@ -623,7 +623,7 @@ void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) {
 }
 
 DwarfCUToModule::DwarfCUToModule(FileContext *file_context,
-                                 LineToModuleFunctor *line_reader,
+                                 LineToModuleHandler *line_reader,
                                  WarningReporter *reporter)
     : line_reader_(line_reader), has_source_line_info_(false) { 
   cu_context_ = new CUContext(file_context, reporter);
@@ -666,8 +666,16 @@ void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr,
 void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr,
                                              enum DwarfForm form,
                                              const string &data) {
-  if (attr == dwarf2reader::DW_AT_name)
-    cu_context_->reporter->SetCUName(data);
+  switch (attr) {
+    case dwarf2reader::DW_AT_name:
+      cu_context_->reporter->SetCUName(data);
+      break;
+    case dwarf2reader::DW_AT_comp_dir:
+      line_reader_->StartCompilationUnit(data);
+      break;
+    default:
+      break;
+  }
 }
 
 bool DwarfCUToModule::EndAttributes() {
@@ -744,8 +752,8 @@ void DwarfCUToModule::ReadSourceLines(uint64 offset) {
     cu_context_->reporter->BadLineInfoOffset(offset);
     return;
   }
-  (*line_reader_)(section_start + offset, section_length - offset,
-                  cu_context_->file_context->module, &lines_);
+  line_reader_->ReadProgram(section_start + offset, section_length - offset,
+                            cu_context_->file_context->module, &lines_);
 }
 
 namespace {
diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h
index c7072b0f..85453316 100644
--- a/src/common/dwarf_cu_to_module.h
+++ b/src/common/dwarf_cu_to_module.h
@@ -92,21 +92,27 @@ class DwarfCUToModule: public dwarf2reader::RootDIEHandler {
     FilePrivate *file_private;
   };
 
-  // An abstract base class for functors that handle DWARF line data
+  // An abstract base class for handlers that handle DWARF line data
   // for DwarfCUToModule. DwarfCUToModule could certainly just use
   // dwarf2reader::LineInfo itself directly, but decoupling things
   // this way makes unit testing a little easier.
-  class LineToModuleFunctor {
+  class LineToModuleHandler {
    public:
-    LineToModuleFunctor() { }
-    virtual ~LineToModuleFunctor() { }
+    LineToModuleHandler() { }
+    virtual ~LineToModuleHandler() { }
+
+    // Called at the beginning of a new compilation unit, prior to calling
+    // ReadProgram(). compilation_dir will indicate the path that the
+    // current compilation unit was compiled in, consistent with the
+    // DW_AT_comp_dir DIE.
+    virtual void StartCompilationUnit(const string& compilation_dir) = 0;
 
     // Populate MODULE and LINES with source file names and code/line
     // mappings, given a pointer to some DWARF line number data
     // PROGRAM, and an overestimate of its size. Add no zero-length
     // lines to LINES.
-    virtual void operator()(const char *program, uint64 length,
-                            Module *module, vector<Module::Line> *lines) = 0;
+    virtual void ReadProgram(const char *program, uint64 length,
+                             Module *module, vector<Module::Line> *lines) = 0;
   };
 
   // The interface DwarfCUToModule uses to report warnings. The member
@@ -186,7 +192,7 @@ class DwarfCUToModule: public dwarf2reader::RootDIEHandler {
   // unit's line number data. Use REPORTER to report problems with the
   // data we find.
   DwarfCUToModule(FileContext *file_context,
-                  LineToModuleFunctor *line_reader,
+                  LineToModuleHandler *line_reader,
                   WarningReporter *reporter);
   ~DwarfCUToModule();
 
@@ -247,8 +253,8 @@ class DwarfCUToModule: public dwarf2reader::RootDIEHandler {
   // owned by this DwarfCUToModule: the constructor sets them, and the
   // destructor deletes them.
 
-  // The functor to use to handle line number data.
-  LineToModuleFunctor *line_reader_;
+  // The handler to use to handle line number data.
+  LineToModuleHandler *line_reader_;
 
   // This compilation unit's context.
   CUContext *cu_context_;
diff --git a/src/common/dwarf_cu_to_module_unittest.cc b/src/common/dwarf_cu_to_module_unittest.cc
index 37061d2a..81e629b0 100644
--- a/src/common/dwarf_cu_to_module_unittest.cc
+++ b/src/common/dwarf_cu_to_module_unittest.cc
@@ -62,14 +62,11 @@ using ::testing::ValuesIn;
 
 // Mock classes.
 
-class MockLineToModuleFunctor: public DwarfCUToModule::LineToModuleFunctor {
+class MockLineToModuleHandler: public DwarfCUToModule::LineToModuleHandler {
  public:
-  MOCK_METHOD4(mock_apply, void(const char *program, uint64 length,
-                                Module *module, vector<Module::Line> *lines));
-  void operator()(const char *program, uint64 length,
-                  Module *module, vector<Module::Line> *lines) {
-    mock_apply(program, length, module, lines);
-  }
+  MOCK_METHOD1(StartCompilationUnit, void(const string& compilation_dir));
+  MOCK_METHOD4(ReadProgram, void(const char* program, uint64 length,
+                                 Module *module, vector<Module::Line> *lines));
 };
 
 class MockWarningReporter: public DwarfCUToModule::WarningReporter {
@@ -102,10 +99,10 @@ class CUFixtureBase {
   //   appender(line_program, length, module, line_vector);
   //
   // will append lines to the end of line_vector.  We can use this with
-  // MockLineToModuleFunctor like this:
+  // MockLineToModuleHandler like this:
   //
-  //   MockLineToModuleFunctor l2m;
-  //   EXPECT_CALL(l2m, mock_apply(_,_,_,_))
+  //   MockLineToModuleHandler l2m;
+  //   EXPECT_CALL(l2m, ReadProgram(_,_,_,_))
   //       .WillOnce(DoAll(Invoke(appender), Return()));
   //
   // in which case calling l2m with some line vector will append lines.
@@ -143,7 +140,8 @@ class CUFixtureBase {
 
     // By default, expect the line program reader not to be invoked. We
     // may override this in StartCU.
-    EXPECT_CALL(line_reader_, mock_apply(_,_,_,_)).Times(0);
+    EXPECT_CALL(line_reader_, StartCompilationUnit(_)).Times(0);
+    EXPECT_CALL(line_reader_, ReadProgram(_,_,_,_)).Times(0);
 
     // The handler will consult this section map to decide what to
     // pass to our line reader.
@@ -153,7 +151,7 @@ class CUFixtureBase {
 
   // Add a line with the given address, size, filename, and line
   // number to the end of the statement list the handler will receive
-  // when it invokes its LineToModuleFunctor. Call this before calling
+  // when it invokes its LineToModuleHandler. Call this before calling
   // StartCU.
   void PushLine(Module::Address address, Module::Address size,
                 const string &filename, int line_number);
@@ -271,13 +269,17 @@ class CUFixtureBase {
   // report it as an unsigned value.
   bool language_signed_;
 
+  // If this is not empty, we'll give the CU a DW_AT_comp_dir attribute that
+  // indicates the path that this compilation unit was compiled in.
+  string compilation_dir_;
+
   // If this is not empty, we'll give the CU a DW_AT_stmt_list
   // attribute that, when passed to line_reader_, adds these lines to the
   // provided lines array.
   vector<Module::Line> lines_;
 
   // Mock line program reader.
-  MockLineToModuleFunctor line_reader_;
+  MockLineToModuleHandler line_reader_;
   AppendLinesFunctor appender_;
   static const char dummy_line_program_[];
   static const size_t dummy_line_size_;
@@ -311,6 +313,10 @@ void CUFixtureBase::PushLine(Module::Address address, Module::Address size,
 }
 
 void CUFixtureBase::StartCU() {
+  if (!compilation_dir_.empty())
+    EXPECT_CALL(line_reader_,
+                StartCompilationUnit(compilation_dir_)).Times(1);
+
   // If we have lines, make the line reader expect to be invoked at
   // most once. (Hey, if the handler can pass its tests without
   // bothering to read the line number data, that's great.)
@@ -318,8 +324,8 @@ void CUFixtureBase::StartCU() {
   // initial expectation (no calls) in force.
   if (!lines_.empty())
     EXPECT_CALL(line_reader_,
-                mock_apply(&dummy_line_program_[0], dummy_line_size_,
-                           &module_, _))
+                ReadProgram(&dummy_line_program_[0], dummy_line_size_,
+                            &module_, _))
         .Times(AtMost(1))
         .WillOnce(DoAll(Invoke(appender_), Return()));
 
@@ -333,6 +339,10 @@ void CUFixtureBase::StartCU() {
   root_handler_.ProcessAttributeString(dwarf2reader::DW_AT_name,
                                        dwarf2reader::DW_FORM_strp,
                                        "compilation-unit-name");
+  if (!compilation_dir_.empty())
+    root_handler_.ProcessAttributeString(dwarf2reader::DW_AT_comp_dir,
+                                         dwarf2reader::DW_FORM_strp,
+                                         compilation_dir_);
   if (!lines_.empty())
     root_handler_.ProcessAttributeUnsigned(dwarf2reader::DW_AT_stmt_list,
                                            dwarf2reader::DW_FORM_ref4,
@@ -626,6 +636,13 @@ void CUFixtureBase::TestLine(int i, int j,
 class SimpleCU: public CUFixtureBase, public Test {
 };
 
+TEST_F(SimpleCU, CompilationDir) {
+  compilation_dir_ = "/src/build/";
+
+  StartCU();
+  root_handler_.Finish();
+}
+
 TEST_F(SimpleCU, OneFunc) {
   PushLine(0x938cf8c07def4d34ULL, 0x55592d727f6cd01fLL, "line-file", 246571772);
 
@@ -1414,8 +1431,8 @@ TEST_F(Specifications, InterCU) {
   Module m("module-name", "module-os", "module-arch", "module-id");
   DwarfCUToModule::FileContext fc("dwarf-filename", &m);
   EXPECT_CALL(reporter_, UncoveredFunction(_)).WillOnce(Return());
-  MockLineToModuleFunctor lr;
-  EXPECT_CALL(lr, mock_apply(_,_,_,_)).Times(0);
+  MockLineToModuleHandler lr;
+  EXPECT_CALL(lr, ReadProgram(_,_,_,_)).Times(0);
 
   // Kludge: satisfy reporter_'s expectation.
   reporter_.SetCUName("compilation-unit-name");
diff --git a/src/common/dwarf_line_to_module.cc b/src/common/dwarf_line_to_module.cc
index 962848d1..258b0b60 100644
--- a/src/common/dwarf_line_to_module.cc
+++ b/src/common/dwarf_line_to_module.cc
@@ -48,13 +48,17 @@ static bool PathIsAbsolute(const string &path) {
   return (path.size() >= 1 && path[0] == '/');
 }
 
+static bool HasTrailingSlash(const string &path) {
+  return (path.size() >= 1 && path[path.size() - 1] == '/');
+}
+
 // If PATH is an absolute path, return PATH.  If PATH is a relative path,
 // treat it as relative to BASE and return the combined path.
 static string ExpandPath(const string &path,
                          const string &base) {
-  if (PathIsAbsolute(path))
+  if (PathIsAbsolute(path) || base.empty())
     return path;
-  return base + "/" + path;
+  return base + (HasTrailingSlash(base) ? "" : "/") + path;
 }
 
 namespace google_breakpad {
@@ -63,7 +67,7 @@ void DwarfLineToModule::DefineDir(const string &name, uint32 dir_num) {
   // Directory number zero is reserved to mean the compilation
   // directory. Silently ignore attempts to redefine it.
   if (dir_num != 0)
-    directories_[dir_num] = name;
+    directories_[dir_num] = ExpandPath(name, compilation_dir_);
 }
 
 void DwarfLineToModule::DefineFile(const string &name, int32 file_num,
@@ -74,25 +78,26 @@ void DwarfLineToModule::DefineFile(const string &name, int32 file_num,
   else if (file_num > highest_file_number_)
     highest_file_number_ = file_num;
 
-  string full_name;
-  if (dir_num != 0) {
+  string dir_name;
+  if (dir_num == 0) {
+    // Directory number zero is the compilation directory, and is stored as
+    // an attribute on the compilation unit, rather than in the program table.
+    dir_name = compilation_dir_;
+  } else {
     DirectoryTable::const_iterator directory_it = directories_.find(dir_num);
     if (directory_it != directories_.end()) {
-      full_name = ExpandPath(name, directory_it->second);
+      dir_name = directory_it->second;
     } else {
       if (!warned_bad_directory_number_) {
         fprintf(stderr, "warning: DWARF line number data refers to undefined"
                 " directory numbers\n");
         warned_bad_directory_number_ = true;
       }
-      full_name = name; // just treat name as relative
     }
-  } else {
-    // Directory number zero is the compilation directory; we just report
-    // relative paths in that case.
-    full_name = name;
   }
 
+  string full_name = ExpandPath(name, dir_name);
+
   // Find a Module::File object of the given name, and add it to the
   // file table.
   files_[file_num] = module_->FindFile(full_name);
diff --git a/src/common/dwarf_line_to_module.h b/src/common/dwarf_line_to_module.h
index 9382e40d..1fdd4cb7 100644
--- a/src/common/dwarf_line_to_module.h
+++ b/src/common/dwarf_line_to_module.h
@@ -120,8 +120,10 @@ class DwarfLineToModule: public dwarf2reader::LineInfoHandler {
   // end of the address space, we clip it. It's up to our client to
   // sort out which lines belong to which functions; we don't add them
   // to any particular function in MODULE ourselves.
-  DwarfLineToModule(Module *module, vector<Module::Line> *lines)
+  DwarfLineToModule(Module *module, const string& compilation_dir,
+                    vector<Module::Line> *lines)
       : module_(module),
+        compilation_dir_(compilation_dir),
         lines_(lines),
         highest_file_number_(-1),
         omitted_line_end_(0),
@@ -146,6 +148,10 @@ class DwarfLineToModule: public dwarf2reader::LineInfoHandler {
   // client.
   Module *module_;
 
+  // The compilation directory for the current compilation unit whose
+  // lines are being accumulated.
+  string compilation_dir_;
+
   // The vector of lines we're accumulating. Owned by our client.
   //
   // In a Module, as in a breakpad symbol file, lines belong to
diff --git a/src/common/dwarf_line_to_module_unittest.cc b/src/common/dwarf_line_to_module_unittest.cc
index 1e123e97..7c0fcfd3 100644
--- a/src/common/dwarf_line_to_module_unittest.cc
+++ b/src/common/dwarf_line_to_module_unittest.cc
@@ -45,7 +45,7 @@ using google_breakpad::Module;
 TEST(SimpleModule, One) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineFile("file1", 0x30bf0f27, 0, 0, 0);
   h.AddLine(0x6fd126fbf74f2680LL, 0x63c9a14cf556712bLL, 0x30bf0f27,
@@ -54,7 +54,7 @@ TEST(SimpleModule, One) {
   vector<Module::File *> files;
   m.GetFiles(&files);
   EXPECT_EQ(1U, files.size());
-  EXPECT_STREQ("file1", files[0]->name.c_str());
+  EXPECT_STREQ("/file1", files[0]->name.c_str());
 
   EXPECT_EQ(1U, lines.size());
   EXPECT_EQ(0x6fd126fbf74f2680ULL, lines[0].address);
@@ -66,7 +66,7 @@ TEST(SimpleModule, One) {
 TEST(SimpleModule, Many) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineDir("directory1", 0x838299ab);
   h.DefineDir("directory2", 0xf85de023);
@@ -89,11 +89,11 @@ TEST(SimpleModule, Many) {
   vector<Module::File *> files;
   m.GetFiles(&files);
   ASSERT_EQ(5U, files.size());
-  EXPECT_STREQ("directory1/file1", files[0]->name.c_str());
-  EXPECT_STREQ("directory1/file2", files[1]->name.c_str());
-  EXPECT_STREQ("directory2/file1", files[2]->name.c_str());
-  EXPECT_STREQ("directory2/file2", files[3]->name.c_str());
-  EXPECT_STREQ("file3",            files[4]->name.c_str());
+  EXPECT_STREQ("/directory1/file1", files[0]->name.c_str());
+  EXPECT_STREQ("/directory1/file2", files[1]->name.c_str());
+  EXPECT_STREQ("/directory2/file1", files[2]->name.c_str());
+  EXPECT_STREQ("/directory2/file2", files[3]->name.c_str());
+  EXPECT_STREQ("/file3",            files[4]->name.c_str());
 
   ASSERT_EQ(5U, lines.size());
 
@@ -126,7 +126,7 @@ TEST(SimpleModule, Many) {
 TEST(Filenames, Absolute) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineDir("directory1", 1);
   h.DefineFile("/absolute", 1, 1, 0, 0);
@@ -144,7 +144,7 @@ TEST(Filenames, Absolute) {
 TEST(Filenames, Relative) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineDir("directory1", 1);
   h.DefineFile("relative", 1, 1, 0, 0);
@@ -154,7 +154,7 @@ TEST(Filenames, Relative) {
   vector<Module::File *> files;
   m.GetFiles(&files);
   ASSERT_EQ(1U, files.size());
-  EXPECT_STREQ("directory1/relative", files[0]->name.c_str());
+  EXPECT_STREQ("/directory1/relative", files[0]->name.c_str());
   ASSERT_EQ(1U, lines.size());
   EXPECT_TRUE(lines[0].file == files[0]);
 }
@@ -162,20 +162,20 @@ TEST(Filenames, Relative) {
 TEST(Filenames, StrangeFile) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineDir("directory1", 1);
   h.DefineFile("", 1, 1, 0, 0);
   h.AddLine(1, 1, 1, 0, 0);
 
   ASSERT_EQ(1U, lines.size());
-  EXPECT_STREQ("directory1/", lines[0].file->name.c_str());
+  EXPECT_STREQ("/directory1/", lines[0].file->name.c_str());
 }
 
 TEST(Filenames, StrangeDirectory) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineDir("", 1);
   h.DefineFile("file1", 1, 1, 0, 0);
@@ -188,7 +188,7 @@ TEST(Filenames, StrangeDirectory) {
 TEST(Filenames, StrangeDirectoryAndFile) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineDir("", 1);
   h.DefineFile("", 1, 1, 0, 0);
@@ -198,12 +198,60 @@ TEST(Filenames, StrangeDirectoryAndFile) {
   EXPECT_STREQ("/", lines[0].file->name.c_str());
 }
 
+// We should use the compilation directory when encountering a file for
+// directory number zero.
+TEST(Filenames, DirectoryZeroFileIsRelativeToCompilationDir) {
+  Module m("name", "os", "architecture", "id");
+  vector<Module::Line> lines;
+  DwarfLineToModule h(&m, "src/build", &lines);
+
+  h.DefineDir("Dir", 1);
+  h.DefineFile("File", 1, 0, 0, 0);
+
+  h.AddLine(1, 1, 1, 0, 0);
+
+  ASSERT_EQ(1U, lines.size());
+  EXPECT_STREQ("src/build/File", lines[0].file->name.c_str());
+}
+
+// We should treat non-absolute directories as relative to the compilation
+// directory.
+TEST(Filenames, IncludeDirectoryRelativeToDirectoryZero) {
+  Module m("name", "os", "architecture", "id");
+  vector<Module::Line> lines;
+  DwarfLineToModule h(&m, "src/build", &lines);
+
+  h.DefineDir("Dir", 1);
+  h.DefineFile("File", 1, 1, 0, 0);
+
+  h.AddLine(1, 1, 1, 0, 0);
+
+  ASSERT_EQ(1U, lines.size());
+  EXPECT_STREQ("src/build/Dir/File", lines[0].file->name.c_str());
+}
+
+// We should treat absolute directories as absolute, and not relative to
+// the compilation dir.
+TEST(Filenames, IncludeDirectoryAbsolute) {
+  Module m("name", "os", "architecture", "id");
+  vector<Module::Line> lines;
+  DwarfLineToModule h(&m, "src/build", &lines);
+
+  h.DefineDir("/Dir", 1);
+  h.DefineFile("File", 1, 1, 0, 0);
+
+  h.AddLine(1, 1, 1, 0, 0);
+
+  ASSERT_EQ(1U, lines.size());
+  EXPECT_STREQ("/Dir/File", lines[0].file->name.c_str());
+}
+
 // We should silently ignore attempts to define directory number zero,
 // since that is always the compilation directory.
 TEST(ModuleErrors, DirectoryZero) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineDir("directory0", 0); // should be ignored
   h.DefineFile("relative", 1, 0, 0, 0);
@@ -211,7 +259,7 @@ TEST(ModuleErrors, DirectoryZero) {
   h.AddLine(1, 1, 1, 0, 0);
 
   ASSERT_EQ(1U, lines.size());
-  EXPECT_STREQ("relative", lines[0].file->name.c_str());
+  EXPECT_STREQ("/relative", lines[0].file->name.c_str());
 }
 
 // We should refuse to add lines with bogus file numbers. We should
@@ -219,7 +267,7 @@ TEST(ModuleErrors, DirectoryZero) {
 TEST(ModuleErrors, BadFileNumber) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineFile("relative", 1, 0, 0, 0);
   h.AddLine(1, 1, 2, 0, 0); // bad file number
@@ -233,7 +281,7 @@ TEST(ModuleErrors, BadFileNumber) {
 TEST(ModuleErrors, BadDirectoryNumber) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineDir("directory1", 1);
   h.DefineFile("baddirnumber1", 1, 2, 0, 0); // bad directory number
@@ -248,7 +296,7 @@ TEST(ModuleErrors, BadDirectoryNumber) {
 TEST(ModuleErrors, EmptyLine) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineFile("filename1", 1, 0, 0, 0);
   h.AddLine(1, 0, 1, 0, 0);
@@ -261,7 +309,7 @@ TEST(ModuleErrors, EmptyLine) {
 TEST(ModuleErrors, BigLine) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineFile("filename1", 1, 0, 0, 0);
   h.AddLine(0xffffffffffffffffULL, 2, 1, 0, 0);
@@ -278,7 +326,7 @@ TEST(ModuleErrors, BigLine) {
 TEST(Omitted, DroppedThenGood) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineFile("filename1", 1, 0, 0, 0);
   h.AddLine(0,  10, 1, 83816211, 0);   // should be omitted
@@ -291,7 +339,7 @@ TEST(Omitted, DroppedThenGood) {
 TEST(Omitted, GoodThenDropped) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineFile("filename1", 1, 0, 0, 0);
   h.AddLine(0x9dd6a372, 10, 1, 41454594, 0);   // should be recorded
@@ -304,7 +352,7 @@ TEST(Omitted, GoodThenDropped) {
 TEST(Omitted, Mix1) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineFile("filename1", 1, 0, 0, 0);
   h.AddLine(0x679ed72f,  10,   1, 58932642, 0);   // should be recorded
@@ -325,7 +373,7 @@ TEST(Omitted, Mix1) {
 TEST(Omitted, Mix2) {
   Module m("name", "os", "architecture", "id");
   vector<Module::Line> lines;
-  DwarfLineToModule h(&m, &lines);
+  DwarfLineToModule h(&m, "/", &lines);
 
   h.DefineFile("filename1", 1, 0, 0, 0);
   h.AddLine(0,           0xf2, 1, 58802211, 0);   // should be omitted
diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc
index 4a85ab8f..b739bf71 100644
--- a/src/common/linux/dump_symbols.cc
+++ b/src/common/linux/dump_symbols.cc
@@ -186,18 +186,22 @@ bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
 // A line-to-module loader that accepts line number info parsed by
 // dwarf2reader::LineInfo and populates a Module and a line vector
 // with the results.
-class DumperLineToModule: public DwarfCUToModule::LineToModuleFunctor {
+class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler {
  public:
   // Create a line-to-module converter using BYTE_READER.
   explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
       : byte_reader_(byte_reader) { }
-  void operator()(const char *program, uint64 length,
-                  Module *module, std::vector<Module::Line> *lines) {
-    DwarfLineToModule handler(module, lines);
+  void StartCompilationUnit(const string& compilation_dir) {
+    compilation_dir_ = compilation_dir;
+  }
+  void ReadProgram(const char *program, uint64 length,
+                   Module *module, std::vector<Module::Line> *lines) {
+    DwarfLineToModule handler(module, compilation_dir_, lines);
     dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
     parser.Start();
   }
  private:
+  string compilation_dir_;
   dwarf2reader::ByteReader *byte_reader_;
 };
 
diff --git a/src/common/mac/dump_syms.mm b/src/common/mac/dump_syms.mm
index d79afe26..e26b05ea 100644
--- a/src/common/mac/dump_syms.mm
+++ b/src/common/mac/dump_syms.mm
@@ -227,18 +227,24 @@ string DumpSymbols::Identifier() {
 // dwarf2reader::LineInfo and populates a Module and a line vector
 // with the results.
 class DumpSymbols::DumperLineToModule:
-      public DwarfCUToModule::LineToModuleFunctor {
+      public DwarfCUToModule::LineToModuleHandler {
  public:
   // Create a line-to-module converter using BYTE_READER.
   DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
       : byte_reader_(byte_reader) { }
-  void operator()(const char *program, uint64 length,
-                  Module *module, vector<Module::Line> *lines) {
-    DwarfLineToModule handler(module, lines);
+
+  void StartCompilationUnit(const string& compilation_dir) {
+    compilation_dir_ = compilation_dir;
+  }
+
+  void ReadProgram(const char *program, uint64 length,
+                   Module *module, vector<Module::Line> *lines) {
+    DwarfLineToModule handler(module, compilation_dir_, lines);
     dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
     parser.Start();
   }
  private:
+  string compilation_dir_;
   dwarf2reader::ByteReader *byte_reader_;  // WEAK
 };
 
diff --git a/src/tools/python/filter_syms.py b/src/tools/python/filter_syms.py
new file mode 100644
index 00000000..738cb3e9
--- /dev/null
+++ b/src/tools/python/filter_syms.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Normalizes and de-duplicates paths within Breakpad symbol files.
+
+When using DWARF for storing debug symbols, some file information will be
+stored relative to the current working directory of the current compilation
+unit, and may be further relativized based upon how the file was #included.
+
+This helper can be used to parse the Breakpad symbol file generated from such
+DWARF files and normalize and de-duplicate the FILE records found within,
+updating any references to the FILE records in the other record types.
+"""
+
+import macpath
+import ntpath
+import optparse
+import os
+import posixpath
+import sys
+
+class BreakpadParseError(Exception):
+  """Unsupported Breakpad symbol record exception class."""
+  pass
+
+class SymbolFileParser(object):
+  """Parser for Breakpad symbol files.
+
+  The format of these files is documented at
+  https://code.google.com/p/google-breakpad/wiki/SymbolFiles
+  """
+
+  def __init__(self, input_stream, output_stream, ignored_prefixes=None,
+               path_handler=os.path):
+    """Inits a SymbolFileParser to read symbol records from |input_stream| and
+    write the processed output to |output_stream|.
+    
+    |ignored_prefixes| contains a list of optional path prefixes that
+    should be stripped from the final, normalized path outputs.
+    
+    For example, if the Breakpad symbol file had all paths starting with a
+    common prefix, such as:
+      FILE 1 /b/build/src/foo.cc
+      FILE 2 /b/build/src/bar.cc
+    Then adding "/b/build/src" as an ignored prefix would result in an output
+    file that contained:
+      FILE 1 foo.cc
+      FILE 2 bar.cc
+    
+    Note that |ignored_prefixes| does not necessarily contain file system
+    paths, as the contents of the DWARF DW_AT_comp_dir attribute is dependent
+    upon the host system and compiler, and may contain additional information
+    such as hostname or compiler version.
+    """
+
+    self.unique_files = {}
+    self.duplicate_files = {}
+    self.input_stream = input_stream
+    self.output_stream = output_stream
+    self.ignored_prefixes = ignored_prefixes or []
+    self.path_handler = path_handler
+
+  def Process(self):
+    """Processes the Breakpad symbol file."""
+    for line in self.input_stream:
+      parsed = self._ParseRecord(line.rstrip())
+      if parsed:
+        self.output_stream.write(parsed + '\n')
+
+  def _ParseRecord(self, record):
+    """Parses a single Breakpad symbol record - a single line from the symbol
+    file.
+
+    Returns:
+        The modified string to write to the output file, or None if no line
+        should be written.
+    """
+    record_type = record.partition(' ')[0]
+    if record_type == 'FILE':
+      return self._ParseFileRecord(record)
+    elif self._IsLineRecord(record_type):
+      return self._ParseLineRecord(record)
+    else:
+      # Simply pass the record through unaltered.
+      return record
+
+  def _NormalizePath(self, path):
+    """Normalizes a file path to its canonical form.
+
+    As this may not execute on the machine or file system originally
+    responsible for compilation, it may be necessary to further correct paths
+    for symlinks, junctions, or other such file system indirections.
+
+    Returns:
+        A unique, canonical representation for the the file path.
+    """
+    return self.path_handler.normpath(path)
+
+  def _AdjustPath(self, path):
+    """Adjusts the supplied path after performing path de-duplication.
+
+    This may be used to perform secondary adjustments, such as removing a
+    common prefix, such as "/D/build", or replacing the file system path with
+    information from the version control system.
+
+    Returns:
+        The actual path to use when writing the FILE record.
+    """
+    return path[len(filter(path.startswith,
+                           self.ignored_prefixes + [''])[0]):]
+
+  def _ParseFileRecord(self, file_record):
+    """Parses and corrects a FILE record."""
+    file_info = file_record[5:].split(' ', 3)
+    if len(file_info) > 2:
+      raise BreakpadParseError('Unsupported FILE record: ' + file_record)
+    file_index = int(file_info[0])
+    file_name = self._NormalizePath(file_info[1])
+    existing_file_index = self.unique_files.get(file_name)
+    if existing_file_index is None:
+      self.unique_files[file_name] = file_index
+      file_info[1] = self._AdjustPath(file_name)
+      return 'FILE ' + ' '.join(file_info)
+    else:
+      self.duplicate_files[file_index] = existing_file_index
+      return None
+
+  def _IsLineRecord(self, record_type):
+    """Determines if the current record type is a Line record"""
+    try:
+      line = int(record_type, 16)
+    except (ValueError, TypeError):
+      return False
+    return True
+
+  def _ParseLineRecord(self, line_record):
+    """Parses and corrects a Line record."""
+    line_info = line_record.split(' ', 5)
+    if len(line_info) > 4:
+      raise BreakpadParseError('Unsupported Line record: ' + line_record)
+    file_index = int(line_info[3])
+    line_info[3] = str(self.duplicate_files.get(file_index, file_index))
+    return ' '.join(line_info)
+
+def main():
+  option_parser = optparse.OptionParser()
+  option_parser.add_option("-p", "--prefix",
+                           action="append", dest="prefixes", type="string",
+                           default=[],
+                           help="A path prefix that should be removed from "
+                                "all FILE lines. May be repeated to specify "
+                                "multiple prefixes.")
+  option_parser.add_option("-t", "--path_type",
+                           action="store", type="choice", dest="path_handler",
+                           choices=['win32', 'posix'],
+                           help="Indicates how file paths should be "
+                                "interpreted. The default is to treat paths "
+                                "the same as the OS running Python (eg: "
+                                "os.path)")
+  options, args = option_parser.parse_args()
+  if args:
+    option_parser.error('Unknown argument: %s' % args)
+
+  path_handler = { 'win32': ntpath,
+                   'posix': posixpath }.get(options.path_handler, os.path)
+  try:
+    symbol_parser = SymbolFileParser(sys.stdin, sys.stdout, options.prefixes,
+                                     path_handler)
+    symbol_parser.Process()
+  except BreakpadParseError, e:
+    print >> sys.stderr, 'Got an error while processing symbol file'
+    print >> sys.stderr, str(e)
+    return 1
+  return 0
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/src/tools/python/tests/filter_syms_unittest.py b/src/tools/python/tests/filter_syms_unittest.py
new file mode 100644
index 00000000..b111f349
--- /dev/null
+++ b/src/tools/python/tests/filter_syms_unittest.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Unit tests for filter_syms.py"""
+
+import cStringIO
+import ntpath
+import os
+import StringIO
+import sys
+import unittest
+
+ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, os.path.join(ROOT_DIR, '..'))
+
+# In root
+import filter_syms
+
+class FilterSysmsTest(unittest.TestCase):
+  def assertParsed(self, input_data, ignored_prefixes, expected):
+    input_io = cStringIO.StringIO(input_data)
+    output_io = cStringIO.StringIO()
+    parser = filter_syms.SymbolFileParser(input_io, output_io,
+                                          ignored_prefixes, ntpath)
+    parser.Process()
+    self.assertEqual(output_io.getvalue(), expected)
+    
+  def testDuplicateFiles(self):
+    """Tests that duplicate files in FILE records are correctly removed and
+    that Line records are updated."""
+
+    INPUT = \
+"""MODULE windows x86 111111111111111111111111111111111 module1.pdb
+INFO CODE_ID FFFFFFFF module1.exe
+FILE 1 foo/../file1_1.cc
+FILE 2 bar/../file1_1.cc
+FILE 3 baz/../file1_1.cc
+FUNC 1000 c 0 Function1_1
+1000 8 45 2
+1008 4 46 3
+100c 4 44 1
+"""
+    EXPECTED_OUTPUT = \
+"""MODULE windows x86 111111111111111111111111111111111 module1.pdb
+INFO CODE_ID FFFFFFFF module1.exe
+FILE 1 file1_1.cc
+FUNC 1000 c 0 Function1_1
+1000 8 45 1
+1008 4 46 1
+100c 4 44 1
+"""
+    self.assertParsed(INPUT, [], EXPECTED_OUTPUT)
+
+  def testIgnoredPrefix(self):
+    """Tests that prefixes in FILE records are correctly removed."""
+
+    INPUT = \
+"""MODULE windows x86 111111111111111111111111111111111 module1.pdb
+INFO CODE_ID FFFFFFFF module1.exe
+FILE 1 /src/build/foo/../file1_1.cc
+FILE 2 /src/build/bar/../file1_2.cc
+FILE 3 /src/build/baz/../file1_2.cc
+FUNC 1000 c 0 Function1_1
+1000 8 45 2
+1008 4 46 3
+100c 4 44 1
+"""
+    EXPECTED_OUTPUT = \
+"""MODULE windows x86 111111111111111111111111111111111 module1.pdb
+INFO CODE_ID FFFFFFFF module1.exe
+FILE 1 file1_1.cc
+FILE 2 file1_2.cc
+FUNC 1000 c 0 Function1_1
+1000 8 45 2
+1008 4 46 2
+100c 4 44 1
+"""
+    IGNORED_PREFIXES = ['\\src\\build\\']
+    self.assertParsed(INPUT, IGNORED_PREFIXES, EXPECTED_OUTPUT)
+
+  def testIgnoredPrefixesDuplicateFiles(self):
+    """Tests that de-duplication of FILE records happens BEFORE prefixes
+    are removed."""
+
+    INPUT = \
+"""MODULE windows x86 111111111111111111111111111111111 module1.pdb
+INFO CODE_ID FFFFFFFF module1.exe
+FILE 1 /src/build/foo/../file1_1.cc
+FILE 2 /src/build/bar/../file1_2.cc
+FILE 3 D:/src/build2/baz/../file1_2.cc
+FUNC 1000 c 0 Function1_1
+1000 8 45 2
+1008 4 46 3
+100c 4 44 1
+"""
+    EXPECTED_OUTPUT = \
+"""MODULE windows x86 111111111111111111111111111111111 module1.pdb
+INFO CODE_ID FFFFFFFF module1.exe
+FILE 1 file1_1.cc
+FILE 2 file1_2.cc
+FILE 3 file1_2.cc
+FUNC 1000 c 0 Function1_1
+1000 8 45 2
+1008 4 46 3
+100c 4 44 1
+"""
+    IGNORED_PREFIXES = ['\\src\\build\\', 'D:\\src\\build2\\']
+    self.assertParsed(INPUT, IGNORED_PREFIXES, EXPECTED_OUTPUT)
+
+if __name__ == '__main__':
+  unittest.main()
\ No newline at end of file