Handle program strings with the assignment operator smashed against the next

token.

Patch by Benjamin Smedberg <bsmedberg@gmail.com>


git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@926 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
mark@chromium.org 2012-02-23 22:41:36 +00:00
parent 907f95c5bd
commit 26c31918f7
3 changed files with 152 additions and 121 deletions

View file

@ -66,6 +66,141 @@ class AutoStackClearer {
};
template<typename ValueType>
bool PostfixEvaluator<ValueType>::EvaluateToken(
const string &token,
const string &expression,
DictionaryValidityType *assigned) {
// There are enough binary operations that do exactly the same thing
// (other than the specific operation, of course) that it makes sense
// to share as much code as possible.
enum BinaryOperation {
BINARY_OP_NONE = 0,
BINARY_OP_ADD,
BINARY_OP_SUBTRACT,
BINARY_OP_MULTIPLY,
BINARY_OP_DIVIDE_QUOTIENT,
BINARY_OP_DIVIDE_MODULUS,
BINARY_OP_ALIGN
};
BinaryOperation operation = BINARY_OP_NONE;
if (token == "+")
operation = BINARY_OP_ADD;
else if (token == "-")
operation = BINARY_OP_SUBTRACT;
else if (token == "*")
operation = BINARY_OP_MULTIPLY;
else if (token == "/")
operation = BINARY_OP_DIVIDE_QUOTIENT;
else if (token == "%")
operation = BINARY_OP_DIVIDE_MODULUS;
else if (token == "@")
operation = BINARY_OP_ALIGN;
if (operation != BINARY_OP_NONE) {
// Get the operands.
ValueType operand1 = ValueType();
ValueType operand2 = ValueType();
if (!PopValues(&operand1, &operand2)) {
BPLOG(ERROR) << "Could not PopValues to get two values for binary "
"operation " << token << ": " << expression;
return false;
}
// Perform the operation.
ValueType result;
switch (operation) {
case BINARY_OP_ADD:
result = operand1 + operand2;
break;
case BINARY_OP_SUBTRACT:
result = operand1 - operand2;
break;
case BINARY_OP_MULTIPLY:
result = operand1 * operand2;
break;
case BINARY_OP_DIVIDE_QUOTIENT:
result = operand1 / operand2;
break;
case BINARY_OP_DIVIDE_MODULUS:
result = operand1 % operand2;
break;
case BINARY_OP_ALIGN:
result =
operand1 & (static_cast<ValueType>(-1) ^ (operand2 - 1));
break;
case BINARY_OP_NONE:
// This will not happen, but compilers will want a default or
// BINARY_OP_NONE case.
BPLOG(ERROR) << "Not reached!";
return false;
break;
}
// Save the result.
PushValue(result);
} else if (token == "^") {
// ^ for unary dereference. Can't dereference without memory.
if (!memory_) {
BPLOG(ERROR) << "Attempt to dereference without memory: " <<
expression;
return false;
}
ValueType address;
if (!PopValue(&address)) {
BPLOG(ERROR) << "Could not PopValue to get value to derefence: " <<
expression;
return false;
}
ValueType value;
if (!memory_->GetMemoryAtAddress(address, &value)) {
BPLOG(ERROR) << "Could not dereference memory at address " <<
HexString(address) << ": " << expression;
return false;
}
PushValue(value);
} else if (token == "=") {
// = for assignment.
ValueType value;
if (!PopValue(&value)) {
BPLOG(INFO) << "Could not PopValue to get value to assign: " <<
expression;
return false;
}
// Assignment is only meaningful when assigning into an identifier.
// The identifier must name a variable, not a constant. Variables
// begin with '$'.
string identifier;
if (PopValueOrIdentifier(NULL, &identifier) != POP_RESULT_IDENTIFIER) {
BPLOG(ERROR) << "PopValueOrIdentifier returned a value, but an "
"identifier is needed to assign " <<
HexString(value) << ": " << expression;
return false;
}
if (identifier.empty() || identifier[0] != '$') {
BPLOG(ERROR) << "Can't assign " << HexString(value) << " to " <<
identifier << ": " << expression;
return false;
}
(*dictionary_)[identifier] = value;
if (assigned)
(*assigned)[identifier] = true;
} else {
// The token is not an operator, it's a literal value or an identifier.
// Push it onto the stack as-is. Use push_back instead of PushValue
// because PushValue pushes ValueType as a string, but token is already
// a string.
stack_.push_back(token);
}
return true;
}
template<typename ValueType>
bool PostfixEvaluator<ValueType>::EvaluateInternal(
const string &expression,
@ -74,132 +209,21 @@ bool PostfixEvaluator<ValueType>::EvaluateInternal(
istringstream stream(expression);
string token;
while (stream >> token) {
// There are enough binary operations that do exactly the same thing
// (other than the specific operation, of course) that it makes sense
// to share as much code as possible.
enum BinaryOperation {
BINARY_OP_NONE = 0,
BINARY_OP_ADD,
BINARY_OP_SUBTRACT,
BINARY_OP_MULTIPLY,
BINARY_OP_DIVIDE_QUOTIENT,
BINARY_OP_DIVIDE_MODULUS,
BINARY_OP_ALIGN
};
BinaryOperation operation = BINARY_OP_NONE;
if (token == "+")
operation = BINARY_OP_ADD;
else if (token == "-")
operation = BINARY_OP_SUBTRACT;
else if (token == "*")
operation = BINARY_OP_MULTIPLY;
else if (token == "/")
operation = BINARY_OP_DIVIDE_QUOTIENT;
else if (token == "%")
operation = BINARY_OP_DIVIDE_MODULUS;
else if (token == "@")
operation = BINARY_OP_ALIGN;
if (operation != BINARY_OP_NONE) {
// Get the operands.
ValueType operand1 = ValueType();
ValueType operand2 = ValueType();
if (!PopValues(&operand1, &operand2)) {
BPLOG(ERROR) << "Could not PopValues to get two values for binary "
"operation " << token << ": " << expression;
// Normally, tokens are whitespace-separated, but occasionally, the
// assignment operator is smashed up against the next token, i.e.
// $T0 $ebp 128 + =$eip $T0 4 + ^ =$ebp $T0 ^ =
// This has been observed in program strings produced by MSVS 2010 in LTO
// mode.
if (token.size() > 1 && token[0] == '=') {
if (!EvaluateToken("=", expression, assigned)) {
return false;
}
// Perform the operation.
ValueType result;
switch (operation) {
case BINARY_OP_ADD:
result = operand1 + operand2;
break;
case BINARY_OP_SUBTRACT:
result = operand1 - operand2;
break;
case BINARY_OP_MULTIPLY:
result = operand1 * operand2;
break;
case BINARY_OP_DIVIDE_QUOTIENT:
result = operand1 / operand2;
break;
case BINARY_OP_DIVIDE_MODULUS:
result = operand1 % operand2;
break;
case BINARY_OP_ALIGN:
result =
operand1 & (static_cast<ValueType>(-1) ^ (operand2 - 1));
break;
case BINARY_OP_NONE:
// This will not happen, but compilers will want a default or
// BINARY_OP_NONE case.
BPLOG(ERROR) << "Not reached!";
return false;
break;
}
// Save the result.
PushValue(result);
} else if (token == "^") {
// ^ for unary dereference. Can't dereference without memory.
if (!memory_) {
BPLOG(ERROR) << "Attempt to dereference without memory: " <<
expression;
if (!EvaluateToken(token.substr(1), expression, assigned)) {
return false;
}
ValueType address;
if (!PopValue(&address)) {
BPLOG(ERROR) << "Could not PopValue to get value to derefence: " <<
expression;
return false;
}
ValueType value;
if (!memory_->GetMemoryAtAddress(address, &value)) {
BPLOG(ERROR) << "Could not dereference memory at address " <<
HexString(address) << ": " << expression;
return false;
}
PushValue(value);
} else if (token == "=") {
// = for assignment.
ValueType value;
if (!PopValue(&value)) {
BPLOG(INFO) << "Could not PopValue to get value to assign: " <<
expression;
return false;
}
// Assignment is only meaningful when assigning into an identifier.
// The identifier must name a variable, not a constant. Variables
// begin with '$'.
string identifier;
if (PopValueOrIdentifier(NULL, &identifier) != POP_RESULT_IDENTIFIER) {
BPLOG(ERROR) << "PopValueOrIdentifier returned a value, but an "
"identifier is needed to assign " <<
HexString(value) << ": " << expression;
return false;
}
if (identifier.empty() || identifier[0] != '$') {
BPLOG(ERROR) << "Can't assign " << HexString(value) << " to " <<
identifier << ": " << expression;
return false;
}
(*dictionary_)[identifier] = value;
if (assigned)
(*assigned)[identifier] = true;
} else {
// The token is not an operator, it's a literal value or an identifier.
// Push it onto the stack as-is. Use push_back instead of PushValue
// because PushValue pushes ValueType as a string, but token is already
// a string.
stack_.push_back(token);
} else if (!EvaluateToken(token, expression, assigned)) {
return false;
}
}

View file

@ -153,6 +153,10 @@ class PostfixEvaluator {
bool EvaluateInternal(const string &expression,
DictionaryValidityType *assigned);
bool EvaluateToken(const string &token,
const string &expression,
DictionaryValidityType *assigned);
// The dictionary mapping constant and variable identifiers (strings) to
// values. Keys beginning with '$' are treated as variable names, and
// PostfixEvaluator is free to create and modify these keys. Weak pointer.

View file

@ -150,6 +150,7 @@ static bool RunTests() {
{ "$rDivM 9 6 % =", true }, // $rDivM = 9 % 6 = 3
{ "$rDeref 9 ^ =", true }, // $rDeref = ^9 = 10 (FakeMemoryRegion)
{ "$rAlign 36 8 @ =", true }, // $rAlign = 36 @ 8
{ "$rAdd3 2 2 + =$rMul2 9 6 * =", true } // smashed-equals tokenization
};
map<string, unsigned int> validate_data_0;
validate_data_0["$rAdd"] = 8;
@ -160,6 +161,8 @@ static bool RunTests() {
validate_data_0["$rDivM"] = 3;
validate_data_0["$rDeref"] = 10;
validate_data_0["$rAlign"] = 32;
validate_data_0["$rAdd3"] = 4;
validate_data_0["$rMul2"] = 54;
// The second test set simulates a couple of MSVC program strings.
// The data is fudged a little bit because the tests use FakeMemoryRegion