Adding stricter validation checks to various symbol parser functions.

More specifically, the validation of the following record types is improved:
 - FILE records
 - FUNC records
 - Line record
 - PUBLIC records

Adding unittests.
Review URL: https://breakpad.appspot.com/632003

git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@1217 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
ivan.penkov@gmail.com 2013-09-25 18:25:13 +00:00
parent 5bce3b4d77
commit bd71bdd742
3 changed files with 510 additions and 78 deletions

View file

@ -81,6 +81,64 @@ class BasicSourceLineResolver : public SourceLineResolverBase {
void operator=(const BasicSourceLineResolver&);
};
// Helper class, containing useful methods for parsing of Breakpad symbol files.
class SymbolParseHelper {
public:
// Parses a |file_line| declaration. Returns true on success.
// Format: FILE <id> <filename>.
// Notice, that this method modifies the input |file_line| which is why it
// can't be const. On success, <id>, and <filename> are stored in |*index|,
// and |*filename|. No allocation is done, |*filename| simply points inside
// |file_line|.
static bool ParseFile(char *file_line, // in
long *index, // out
char **filename); // out
// Parses a |function_line| declaration. Returns true on success.
// Format: FUNC <address> <size> <stack_param_size> <name>.
// Notice, that this method modifies the input |function_line| which is why it
// can't be const. On success, <address>, <size>, <stack_param_size>, and
// <name> are stored in |*address|, |*size|, |*stack_param_size|, and |*name|.
// No allocation is done, |*name| simply points inside |function_line|.
static bool ParseFunction(char *function_line, // in
uint64_t *address, // out
uint64_t *size, // out
long *stack_param_size, // out
char **name); // out
// Parses a |line| declaration. Returns true on success.
// Format: <address> <size> <line number> <source file id>
// Notice, that this method modifies the input |function_line| which is why
// it can't be const. On success, <address>, <size>, <line number>, and
// <source file id> are stored in |*address|, |*size|, |*line_number|, and
// |*source_file|.
static bool ParseLine(char *line_line, // in
uint64_t *address, // out
uint64_t *size, // out
long *line_number, // out
long *source_file); // out
// Parses a |public_line| declaration. Returns true on success.
// Format: PUBLIC <address> <stack_param_size> <name>
// Notice, that this method modifies the input |function_line| which is why
// it can't be const. On success, <address>, <stack_param_size>, <name>
// are stored in |*address|, |*stack_param_size|, and |*name|.
// No allocation is done, |*name| simply points inside |public_line|.
static bool ParsePublicSymbol(char *public_line, // in
uint64_t *address, // out
long *stack_param_size, // out
char **name); // out
private:
// Used for success checks after strtoull and strtol.
static bool IsValidAfterNumber(char *after_number);
// Only allow static methods.
SymbolParseHelper();
SymbolParseHelper(const SymbolParseHelper&);
void operator=(const SymbolParseHelper&);
};
} // namespace google_breakpad
#endif // GOOGLE_BREAKPAD_PROCESSOR_BASIC_SOURCE_LINE_RESOLVER_H__

View file

@ -32,12 +32,13 @@
// See basic_source_line_resolver.h and basic_source_line_resolver_types.h
// for documentation.
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <limits>
#include <map>
#include <utility>
#include <vector>
@ -308,87 +309,49 @@ CFIFrameInfo *BasicSourceLineResolver::Module::FindCFIFrameInfo(
}
bool BasicSourceLineResolver::Module::ParseFile(char *file_line) {
// FILE <id> <filename>
file_line += 5; // skip prefix
vector<char*> tokens;
if (!Tokenize(file_line, kWhitespace, 2, &tokens)) {
return false;
}
int index = atoi(tokens[0]);
if (index < 0) {
return false;
}
char *filename = tokens[1];
if (!filename) {
return false;
}
long index;
char *filename;
if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) {
files_.insert(make_pair(index, string(filename)));
return true;
}
return false;
}
BasicSourceLineResolver::Function*
BasicSourceLineResolver::Module::ParseFunction(char *function_line) {
// FUNC <address> <size> <stack_param_size> <name>
function_line += 5; // skip prefix
vector<char*> tokens;
if (!Tokenize(function_line, kWhitespace, 4, &tokens)) {
return NULL;
}
uint64_t address = strtoull(tokens[0], NULL, 16);
uint64_t size = strtoull(tokens[1], NULL, 16);
int stack_param_size = strtoull(tokens[2], NULL, 16);
char *name = tokens[3];
uint64_t address;
uint64_t size;
long stack_param_size;
char *name;
if (SymbolParseHelper::ParseFunction(function_line, &address, &size,
&stack_param_size, &name)) {
return new Function(name, address, size, stack_param_size);
}
return NULL;
}
BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine(
char *line_line) {
// <address> <size> <line number> <source file id>
vector<char*> tokens;
if (!Tokenize(line_line, kWhitespace, 4, &tokens)) {
return NULL;
}
uint64_t address = strtoull(tokens[0], NULL, 16);
uint64_t size = strtoull(tokens[1], NULL, 16);
int line_number = atoi(tokens[2]);
int source_file = atoi(tokens[3]);
// Valid line numbers normally start from 1, however there are functions that
// are associated with a source file but not associated with any line number
// (block helper function) and for such functions the symbol file contains 0
// for the line numbers. Hence, 0 shoud be treated as a valid line number.
// For more information on block helper functions, please, take a look at:
// http://clang.llvm.org/docs/Block-ABI-Apple.html
if (line_number < 0) {
return NULL;
}
uint64_t address;
uint64_t size;
long line_number;
long source_file;
if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number,
&source_file)) {
return new Line(address, size, source_file, line_number);
}
return NULL;
}
bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) {
// PUBLIC <address> <stack_param_size> <name>
// Skip "PUBLIC " prefix.
public_line += 7;
vector<char*> tokens;
if (!Tokenize(public_line, kWhitespace, 3, &tokens)) {
return false;
}
uint64_t address = strtoull(tokens[0], NULL, 16);
int stack_param_size = strtoull(tokens[1], NULL, 16);
char *name = tokens[2];
uint64_t address;
long stack_param_size;
char *name;
if (SymbolParseHelper::ParsePublicSymbol(public_line, &address,
&stack_param_size, &name)) {
// A few public symbols show up with an address of 0. This has been seen
// in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow,
// RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict
@ -402,6 +365,8 @@ bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) {
linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address,
stack_param_size));
return public_symbols_.Store(address, symbol);
}
return false;
}
bool BasicSourceLineResolver::Module::ParseStackInfo(char *stack_info_line) {
@ -495,4 +460,150 @@ bool BasicSourceLineResolver::Module::ParseCFIFrameInfo(
return true;
}
// static
bool SymbolParseHelper::ParseFile(char *file_line, long *index,
char **filename) {
// FILE <id> <filename>
assert(strncmp(file_line, "FILE ", 5) == 0);
file_line += 5; // skip prefix
vector<char*> tokens;
if (!Tokenize(file_line, kWhitespace, 2, &tokens)) {
return false;
}
char *after_number;
*index = strtol(tokens[0], &after_number, 10);
if (!IsValidAfterNumber(after_number) || *index < 0 ||
*index == std::numeric_limits<long>::max()) {
return false;
}
*filename = tokens[1];
if (!filename) {
return false;
}
return true;
}
// static
bool SymbolParseHelper::ParseFunction(char *function_line, uint64_t *address,
uint64_t *size, long *stack_param_size,
char **name) {
// FUNC <address> <size> <stack_param_size> <name>
assert(strncmp(function_line, "FUNC ", 5) == 0);
function_line += 5; // skip prefix
vector<char*> tokens;
if (!Tokenize(function_line, kWhitespace, 4, &tokens)) {
return false;
}
char *after_number;
*address = strtoull(tokens[0], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*address == std::numeric_limits<unsigned long long>::max()) {
return false;
}
*size = strtoull(tokens[1], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*size == std::numeric_limits<unsigned long long>::max()) {
return false;
}
*stack_param_size = strtol(tokens[2], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*stack_param_size == std::numeric_limits<long>::max() ||
*stack_param_size < 0) {
return false;
}
*name = tokens[3];
return true;
}
// static
bool SymbolParseHelper::ParseLine(char *line_line, uint64_t *address,
uint64_t *size, long *line_number,
long *source_file) {
// <address> <size> <line number> <source file id>
vector<char*> tokens;
if (!Tokenize(line_line, kWhitespace, 4, &tokens)) {
return false;
}
char *after_number;
*address = strtoull(tokens[0], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*address == std::numeric_limits<unsigned long long>::max()) {
return false;
}
*size = strtoull(tokens[1], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*size == std::numeric_limits<unsigned long long>::max()) {
return false;
}
*line_number = strtol(tokens[2], &after_number, 10);
if (!IsValidAfterNumber(after_number) ||
*line_number == std::numeric_limits<long>::max()) {
return false;
}
*source_file = strtol(tokens[3], &after_number, 10);
if (!IsValidAfterNumber(after_number) || *source_file < 0 ||
*source_file == std::numeric_limits<long>::max()) {
return false;
}
// Valid line numbers normally start from 1, however there are functions that
// are associated with a source file but not associated with any line number
// (block helper function) and for such functions the symbol file contains 0
// for the line numbers. Hence, 0 should be treated as a valid line number.
// For more information on block helper functions, please, take a look at:
// http://clang.llvm.org/docs/Block-ABI-Apple.html
if (*line_number < 0) {
return false;
}
return true;
}
// static
bool SymbolParseHelper::ParsePublicSymbol(char *public_line,
uint64_t *address,
long *stack_param_size,
char **name) {
// PUBLIC <address> <stack_param_size> <name>
assert(strncmp(public_line, "PUBLIC ", 7) == 0);
public_line += 7; // skip prefix
vector<char*> tokens;
if (!Tokenize(public_line, kWhitespace, 3, &tokens)) {
return false;
}
char *after_number;
*address = strtoull(tokens[0], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*address == std::numeric_limits<unsigned long long>::max()) {
return false;
}
*stack_param_size = strtol(tokens[1], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*stack_param_size == std::numeric_limits<long>::max() ||
*stack_param_size < 0) {
return false;
}
*name = tokens[2];
return true;
}
// static
bool SymbolParseHelper::IsValidAfterNumber(char *after_number) {
if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) {
return true;
}
return false;
}
} // namespace google_breakpad

View file

@ -53,6 +53,7 @@ using google_breakpad::StackFrame;
using google_breakpad::WindowsFrameInfo;
using google_breakpad::linked_ptr;
using google_breakpad::scoped_ptr;
using google_breakpad::SymbolParseHelper;
class TestCodeModule : public CodeModule {
public:
@ -405,6 +406,268 @@ TEST_F(TestBasicSourceLineResolver, TestUnload)
ASSERT_TRUE(resolver.HasModule(&module1));
}
// Test parsing of valid FILE lines. The format is:
// FILE <id> <filename>
TEST(SymbolParseHelper, ParseFileValid) {
long index;
char *filename;
char kTestLine[] = "FILE 1 file name";
ASSERT_TRUE(SymbolParseHelper::ParseFile(kTestLine, &index, &filename));
EXPECT_EQ(1, index);
EXPECT_EQ("file name", string(filename));
// 0 is a valid index.
char kTestLine1[] = "FILE 0 file name";
ASSERT_TRUE(SymbolParseHelper::ParseFile(kTestLine1, &index, &filename));
EXPECT_EQ(0, index);
EXPECT_EQ("file name", string(filename));
}
// Test parsing of invalid FILE lines. The format is:
// FILE <id> <filename>
TEST(SymbolParseHelper, ParseFileInvalid) {
long index;
char *filename;
// Test missing file name.
char kTestLine[] = "FILE 1 ";
ASSERT_FALSE(SymbolParseHelper::ParseFile(kTestLine, &index, &filename));
// Test bad index.
char kTestLine1[] = "FILE x1 file name";
ASSERT_FALSE(SymbolParseHelper::ParseFile(kTestLine1, &index, &filename));
// Test large index.
char kTestLine2[] = "FILE 123123123123123123123123 file name";
ASSERT_FALSE(SymbolParseHelper::ParseFile(kTestLine2, &index, &filename));
// Test negative index.
char kTestLine3[] = "FILE -2 file name";
ASSERT_FALSE(SymbolParseHelper::ParseFile(kTestLine3, &index, &filename));
}
// Test parsing of valid FUNC lines. The format is:
// FUNC <address> <size> <stack_param_size> <name>
TEST(SymbolParseHelper, ParseFunctionValid) {
uint64_t address;
uint64_t size;
long stack_param_size;
char *name;
char kTestLine[] = "FUNC 1 2 3 function name";
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size,
&stack_param_size, &name));
EXPECT_EQ(1ULL, address);
EXPECT_EQ(2ULL, size);
EXPECT_EQ(3, stack_param_size);
EXPECT_EQ("function name", string(name));
// Test hex address, size, and param size.
char kTestLine1[] = "FUNC a1 a2 a3 function name";
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size,
&stack_param_size, &name));
EXPECT_EQ(0xa1ULL, address);
EXPECT_EQ(0xa2ULL, size);
EXPECT_EQ(0xa3, stack_param_size);
EXPECT_EQ("function name", string(name));
char kTestLine2[] = "FUNC 0 0 0 function name";
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size,
&stack_param_size, &name));
EXPECT_EQ(0ULL, address);
EXPECT_EQ(0ULL, size);
EXPECT_EQ(0, stack_param_size);
EXPECT_EQ("function name", string(name));
}
// Test parsing of invalid FUNC lines. The format is:
// FUNC <address> <size> <stack_param_size> <name>
TEST(SymbolParseHelper, ParseFunctionInvalid) {
uint64_t address;
uint64_t size;
long stack_param_size;
char *name;
// Test missing function name.
char kTestLine[] = "FUNC 1 2 3 ";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size,
&stack_param_size, &name));
// Test bad address.
char kTestLine1[] = "FUNC 1z 2 3 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size,
&stack_param_size, &name));
// Test large address.
char kTestLine2[] = "FUNC 123123123123123123123123123 2 3 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size,
&stack_param_size, &name));
// Test bad size.
char kTestLine3[] = "FUNC 1 z2 3 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine3, &address, &size,
&stack_param_size, &name));
// Test large size.
char kTestLine4[] = "FUNC 1 231231231231231231231231232 3 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine4, &address, &size,
&stack_param_size, &name));
// Test bad param size.
char kTestLine5[] = "FUNC 1 2 3z function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine5, &address, &size,
&stack_param_size, &name));
// Test large param size.
char kTestLine6[] = "FUNC 1 2 312312312312312312312312323 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine6, &address, &size,
&stack_param_size, &name));
// Negative param size.
char kTestLine7[] = "FUNC 1 2 -5 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine7, &address, &size,
&stack_param_size, &name));
}
// Test parsing of valid lines. The format is:
// <address> <size> <line number> <source file id>
TEST(SymbolParseHelper, ParseLineValid) {
uint64_t address;
uint64_t size;
long line_number;
long source_file;
char kTestLine[] = "1 2 3 4";
ASSERT_TRUE(SymbolParseHelper::ParseLine(kTestLine, &address, &size,
&line_number, &source_file));
EXPECT_EQ(1ULL, address);
EXPECT_EQ(2ULL, size);
EXPECT_EQ(3, line_number);
EXPECT_EQ(4, source_file);
// Test hex size and address.
char kTestLine1[] = "a1 a2 3 4 // some comment";
ASSERT_TRUE(SymbolParseHelper::ParseLine(kTestLine1, &address, &size,
&line_number, &source_file));
EXPECT_EQ(0xa1ULL, address);
EXPECT_EQ(0xa2ULL, size);
EXPECT_EQ(3, line_number);
EXPECT_EQ(4, source_file);
// 0 is a valid line number.
char kTestLine2[] = "a1 a2 0 4 // some comment";
ASSERT_TRUE(SymbolParseHelper::ParseLine(kTestLine2, &address, &size,
&line_number, &source_file));
EXPECT_EQ(0xa1ULL, address);
EXPECT_EQ(0xa2ULL, size);
EXPECT_EQ(0, line_number);
EXPECT_EQ(4, source_file);
}
// Test parsing of invalid lines. The format is:
// <address> <size> <line number> <source file id>
TEST(SymbolParseHelper, ParseLineInvalid) {
uint64_t address;
uint64_t size;
long line_number;
long source_file;
// Test missing source file id.
char kTestLine[] = "1 2 3";
ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine, &address, &size,
&line_number, &source_file));
// Test bad address.
char kTestLine1[] = "1z 2 3 4";
ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine1, &address, &size,
&line_number, &source_file));
// Test large address.
char kTestLine2[] = "123123123123123123123123 2 3 4";
ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine2, &address, &size,
&line_number, &source_file));
// Test bad size.
char kTestLine3[] = "1 z2 3 4";
ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine3, &address, &size,
&line_number, &source_file));
// Test large size.
char kTestLine4[] = "1 123123123123123123123123 3 4";
ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine4, &address, &size,
&line_number, &source_file));
// Test bad line number.
char kTestLine5[] = "1 2 z3 4";
ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine5, &address, &size,
&line_number, &source_file));
// Test negative line number.
char kTestLine6[] = "1 2 -1 4";
ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine6, &address, &size,
&line_number, &source_file));
// Test large line number.
char kTestLine7[] = "1 2 123123123123123123123 4";
ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine7, &address, &size,
&line_number, &source_file));
// Test bad source file id.
char kTestLine8[] = "1 2 3 f";
ASSERT_FALSE(SymbolParseHelper::ParseLine(kTestLine8, &address, &size,
&line_number, &source_file));
}
// Test parsing of valid PUBLIC lines. The format is:
// PUBLIC <address> <stack_param_size> <name>
TEST(SymbolParseHelper, ParsePublicSymbolValid) {
uint64_t address;
long stack_param_size;
char *name;
char kTestLine[] = "PUBLIC 1 2 3";
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address,
&stack_param_size, &name));
EXPECT_EQ(1ULL, address);
EXPECT_EQ(2, stack_param_size);
EXPECT_EQ("3", string(name));
// Test hex size and address.
char kTestLine1[] = "PUBLIC a1 a2 function name";
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address,
&stack_param_size, &name));
EXPECT_EQ(0xa1ULL, address);
EXPECT_EQ(0xa2, stack_param_size);
EXPECT_EQ("function name", string(name));
// Test 0 is a valid address.
char kTestLine2[] = "PUBLIC 0 a2 function name";
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address,
&stack_param_size, &name));
EXPECT_EQ(0ULL, address);
EXPECT_EQ(0xa2, stack_param_size);
EXPECT_EQ("function name", string(name));
}
// Test parsing of invalid PUBLIC lines. The format is:
// PUBLIC <address> <stack_param_size> <name>
TEST(SymbolParseHelper, ParsePublicSymbolInvalid) {
uint64_t address;
long stack_param_size;
char *name;
// Test missing source function name.
char kTestLine[] = "PUBLIC 1 2 ";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address,
&stack_param_size, &name));
// Test bad address.
char kTestLine1[] = "PUBLIC 1z 2 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address,
&stack_param_size, &name));
// Test large address.
char kTestLine2[] = "PUBLIC 123123123123123123123123 2 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address,
&stack_param_size, &name));
// Test bad param stack size.
char kTestLine3[] = "PUBLIC 1 z2 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &address,
&stack_param_size, &name));
// Test large param stack size.
char kTestLine4[] = "PUBLIC 1 123123123123123123123123123 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine4, &address,
&stack_param_size, &name));
// Test negative param stack size.
char kTestLine5[] = "PUBLIC 1 -5 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine5, &address,
&stack_param_size, &name));
}
} // namespace
int main(int argc, char *argv[]) {