From fd98b2af3773437487af0df22c428f3db630949a Mon Sep 17 00:00:00 2001 From: waylonis Date: Thu, 11 Jan 2007 01:49:07 +0000 Subject: [PATCH] Add classes to: walk mach-o files, look for identifiers, and return a 16 byte unique identifier. Fixes #106. git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@102 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/common/mac/file_id.cc | 104 +++++++++++ src/common/mac/file_id.h | 78 +++++++++ src/common/mac/macho_id.cc | 312 +++++++++++++++++++++++++++++++++ src/common/mac/macho_id.h | 124 +++++++++++++ src/common/mac/macho_walker.cc | 207 ++++++++++++++++++++++ src/common/mac/macho_walker.h | 94 ++++++++++ 6 files changed, 919 insertions(+) create mode 100644 src/common/mac/file_id.cc create mode 100644 src/common/mac/file_id.h create mode 100644 src/common/mac/macho_id.cc create mode 100644 src/common/mac/macho_id.h create mode 100644 src/common/mac/macho_walker.cc create mode 100644 src/common/mac/macho_walker.h diff --git a/src/common/mac/file_id.cc b/src/common/mac/file_id.cc new file mode 100644 index 00000000..f74f861e --- /dev/null +++ b/src/common/mac/file_id.cc @@ -0,0 +1,104 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// file_id.cc: Return a unique identifier for a file +// +// See file_id.h for documentation +// +// Author: Dan Waylonis + +#include +#include +#include + +#include "common/mac/file_id.h" +#include "common/mac/macho_id.h" + +using MacFileUtilities::MachoID; + +namespace google_airbag { + +FileID::FileID(const char *path) { + strlcpy(path_, path, sizeof(path_)); +} + +bool FileID::FileIdentifier(unsigned char identifier[16]) { + int fd = open(path_, O_RDONLY); + if (fd == -1) + return false; + + MD5_CTX md5; + MD5_Init(&md5); + + // Read 4k x 2 bytes at a time. This is faster than just 4k bytes, but + // doesn't seem to be an unreasonable size for the stack. + unsigned char buffer[4096 * 2]; + size_t buffer_size = sizeof(buffer); + while ((buffer_size = read(fd, buffer, buffer_size) > 0)) { + MD5_Update(&md5, buffer, buffer_size); + } + + close(fd); + MD5_Final(identifier, &md5); + + return true; +} + +bool FileID::MachoIdentifier(int cpu_type, unsigned char identifier[16]) { + MachoID macho(path_); + + if (macho.UUIDCommand(cpu_type, identifier)) + return true; + + if (macho.IDCommand(cpu_type, identifier)) + return true; + + return macho.MD5(cpu_type, identifier); +} + +// static +void FileID::ConvertIdentifierToString(const unsigned char identifier[16], + char *buffer, int buffer_length) { + int buffer_idx = 0; + for (int idx = 0; (buffer_idx < buffer_length) && (idx < 16); ++idx) { + int hi = (identifier[idx] >> 4) & 0x0F; + int lo = (identifier[idx]) & 0x0F; + + if (idx == 4 || idx == 6 || idx == 8 || idx == 10) + buffer[buffer_idx++] = '-'; + + buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; + buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; + } + + // NULL terminate + buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; +} + +} // namespace google_airbag diff --git a/src/common/mac/file_id.h b/src/common/mac/file_id.h new file mode 100644 index 00000000..663aa369 --- /dev/null +++ b/src/common/mac/file_id.h @@ -0,0 +1,78 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// file_id.h: Return a unique identifier for a file +// +// Author: Dan Waylonis + +#ifndef COMMON_MAC_FILE_ID_H__ +#define COMMON_MAC_FILE_ID_H__ + +#include + +namespace google_airbag { + +class FileID { + public: + FileID(const char *path); + ~FileID() {}; + + // Load the identifier for the file path specified in the constructor into + // |identifier|. Return false if the identifier could not be created for the + // file. + // The current implementation will return the MD5 hash of the file's bytes. + bool FileIdentifier(unsigned char identifier[16]); + + // Treat the file as a mach-o file that will contain one or more archicture. + // Accepted values for |cpu_type| (e.g., CPU_TYPE_X86 or CPU_TYPE_POWERPC) + // are listed in /usr/include/mach/machine.h. + // If |cpu_type| is 0, then the native cpu type is used. + // Returns false if opening the file failed or if the |cpu_type| is not + // present in the file. + // Return the unique identifier in |identifier|. + // The current implementation will look for the (in order of priority): + // LC_UUID, LC_ID_DYLIB, or MD5 hash of the given |cpu_type|. + bool MachoIdentifier(int cpu_type, unsigned char identifier[16]); + + // Convert the |identifier| data to a NULL terminated string. The string will + // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE). + // The |buffer| should be at least 37 bytes long to receive all of the data + // and termination. Shorter buffers will contain truncated data. + static void ConvertIdentifierToString(const unsigned char identifier[16], + char *buffer, int buffer_length); + + private: + // Storage for the path specified + char path_[PATH_MAX]; +}; + +} // namespace google_airbag + +#endif // COMMON_MAC_FILE_ID_H__ + diff --git a/src/common/mac/macho_id.cc b/src/common/mac/macho_id.cc new file mode 100644 index 00000000..f152d275 --- /dev/null +++ b/src/common/mac/macho_id.cc @@ -0,0 +1,312 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// macho_id.cc: Functions to gather identifying information from a macho file +// +// See macho_id.h for documentation +// +// Author: Dan Waylonis + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/mac/macho_id.h" +#include "common/mac/macho_walker.h" + +namespace MacFileUtilities { + +MachoID::MachoID(const char *path) { + strlcpy(path_, path, sizeof(path_)); + file_ = open(path, O_RDONLY); +} + +MachoID::~MachoID() { + if (file_ != -1) + close(file_); +} + +// The CRC info is from http://en.wikipedia.org/wiki/Adler-32 +// With optimizations from http://www.zlib.net/ + +// The largest prime smaller than 65536 +#define MOD_ADLER 65521 +// MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1 +#define MAX_BLOCK 5552 + +void MachoID::UpdateCRC(unsigned char *bytes, size_t size) { +// Unrolled loops for summing +#define DO1(buf,i) {sum1 += (buf)[i]; sum2 += sum1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + // Split up the crc + uint32_t sum1 = crc_ & 0xFFFF; + uint32_t sum2 = (crc_ >> 16) & 0xFFFF; + + // Do large blocks + while (size >= MAX_BLOCK) { + size -= MAX_BLOCK; + int block_count = MAX_BLOCK / 16; + do { + DO16(bytes); + bytes += 16; + } while (--block_count); + sum1 %= MOD_ADLER; + sum2 %= MOD_ADLER; + } + + // Do remaining bytes + if (size) { + while (size >= 16) { + size -= 16; + DO16(bytes); + bytes += 16; + } + while (size--) { + sum1 += *bytes++; + sum2 += sum1; + } + sum1 %= MOD_ADLER; + sum2 %= MOD_ADLER; + crc_ = (sum2 << 16) | sum1; + } +} + +void MachoID::UpdateMD5(unsigned char *bytes, size_t size) { + MD5_Update(&md5_context_, bytes, size); +} + +void MachoID::UpdateSHA1(unsigned char *bytes, size_t size) { + SHA_Update(&sha1_context_, bytes, size); +} + +void MachoID::Update(unsigned char *bytes, size_t size) { + if (update_function_) + (this->*update_function_)(bytes, size); +} + +bool MachoID::UUIDCommand(int cpu_type, unsigned char bytes[16]) { + struct uuid_command uuid_cmd; + MachoWalker walker(path_, UUIDWalkerCB, &uuid_cmd); + + uuid_cmd.cmd = 0; + if (!walker.WalkHeader(cpu_type)) + return false; + + // If we found the command, we'll have initialized the uuid_command + // structure + if (uuid_cmd.cmd == LC_UUID) { + memcpy(bytes, uuid_cmd.uuid, sizeof(uuid_cmd.uuid)); + return true; + } + + return false; +} + +bool MachoID::IDCommand(int cpu_type, unsigned char identifier[16]) { + struct dylib_command dylib_cmd; + MachoWalker walker(path_, IDWalkerCB, &dylib_cmd); + + dylib_cmd.cmd = 0; + if (!walker.WalkHeader(cpu_type)) + return false; + + // If we found the command, we'll have initialized the dylib_command + // structure + if (dylib_cmd.cmd == LC_ID_DYLIB) { + // Take the timestamp, version, and compatability version bytes to form + // the first 12 bytes, pad the rest with zeros + identifier[0] = (dylib_cmd.dylib.timestamp >> 24) & 0xFF; + identifier[1] = (dylib_cmd.dylib.timestamp >> 16) & 0xFF; + identifier[2] = (dylib_cmd.dylib.timestamp >> 8) & 0xFF; + identifier[3] = dylib_cmd.dylib.timestamp & 0xFF; + identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF; + identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF; + identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF; + identifier[7] = dylib_cmd.dylib.current_version & 0xFF; + identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF; + identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF; + identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF; + identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF; + identifier[12] = identifier[13] = identifier[14] = identifier[15] = 0; + + return true; + } + + return false; +} + +uint32_t MachoID::Adler32(int cpu_type) { + MachoWalker walker(path_, WalkerCB, this); + update_function_ = &MachoID::UpdateCRC; + crc_ = 0; + + if (!walker.WalkHeader(cpu_type)) + return 0; + + return crc_; +} + +bool MachoID::MD5(int cpu_type, unsigned char identifier[16]) { + MachoWalker walker(path_, WalkerCB, this); + update_function_ = &MachoID::UpdateMD5; + + if (MD5_Init(&md5_context_)) { + if (!walker.WalkHeader(cpu_type)) + return false; + + MD5_Final(identifier, &md5_context_); + return true; + } + + return false; +} + +bool MachoID::SHA1(int cpu_type, unsigned char identifier[16]) { + MachoWalker walker(path_, WalkerCB, this); + update_function_ = &MachoID::UpdateSHA1; + + if (SHA_Init(&sha1_context_)) { + if (!walker.WalkHeader(cpu_type)) + return false; + + SHA_Final(identifier, &sha1_context_); + return true; + } + + return false; +} + +// static +bool MachoID::WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context) { + MachoID *macho_id = (MachoID *)context; + off_t seg_offset = 0; + size_t seg_size = 0; + + if (cmd->cmd == LC_SEGMENT) { + struct segment_command seg; + + if (!walker->ReadBytes(&seg, sizeof(seg), offset)) + return false; + + if (swap) + swap_segment_command(&seg, NXHostByteOrder()); + + seg_offset = seg.fileoff; + seg_size = seg.filesize; + } else if (cmd->cmd == LC_SEGMENT_64) { + struct segment_command_64 seg64; + + if (!walker->ReadBytes(&seg64, sizeof(seg64), offset)) + return false; + + if (swap) + swap_segment_command_64(&seg64, NXHostByteOrder()); + + seg_offset = seg64.fileoff; + seg_size = seg64.filesize; + } + + // If this was a non-zero segment, read the bytes, update the hash + if (seg_size) { + // Read 4k x 2 bytes at a time. This is faster than just 4k bytes, but + // doesn't seem to be an unreasonable size for the stack. + unsigned char buffer[4096 * 2]; + size_t buffer_size; + off_t file_offset = seg_offset; + while (seg_size > 0) { + if (seg_size > sizeof(buffer)) { + buffer_size = sizeof(buffer); + seg_size -= buffer_size; + } else { + buffer_size = seg_size; + seg_size = 0; + } + + if (!walker->ReadBytes(buffer, buffer_size, file_offset)) + return false; + + macho_id->Update(buffer, buffer_size); + file_offset += buffer_size; + } + } + + // Continue processing + return true; +} + +// static +bool MachoID::UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context) { + if (cmd->cmd == LC_UUID) { + struct uuid_command *uuid_cmd = (struct uuid_command *)context; + + if (!walker->ReadBytes(uuid_cmd, sizeof(struct uuid_command), offset)) + return false; + + if (swap) + swap_uuid_command(uuid_cmd, NXHostByteOrder()); + + return false; + } + + // Continue processing + return true; +} + +// static +bool MachoID::IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context) { + if (cmd->cmd == LC_ID_DYLIB) { + struct dylib_command *dylib_cmd = (struct dylib_command *)context; + + if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset)) + return false; + + if (swap) + swap_dylib_command(dylib_cmd, NXHostByteOrder()); + + return false; + } + + // Continue processing + return true; +} + +} // namespace MacFileUtilities diff --git a/src/common/mac/macho_id.h b/src/common/mac/macho_id.h new file mode 100644 index 00000000..c5261255 --- /dev/null +++ b/src/common/mac/macho_id.h @@ -0,0 +1,124 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// macho_id.h: Functions to gather identifying information from a macho file +// +// Author: Dan Waylonis + +#ifndef COMMON_MAC_MACHO_ID_H__ +#define COMMON_MAC_MACHO_ID_H__ + +#include +#include +#include +#include + +namespace MacFileUtilities { + +class MachoWalker; + +class MachoID { + public: + MachoID(const char *path); + ~MachoID(); + + // For the given |cpu_type|, return a UUID from the LC_UUID command. + // Return false if there isn't a LC_UUID command. + bool UUIDCommand(int cpu_type, unsigned char identifier[16]); + + // For the given |cpu_type|, return a UUID from the LC_ID_DYLIB command. + // Return false if there isn't a LC_ID_DYLIB command. + bool IDCommand(int cpu_type, unsigned char identifier[16]); + + // For the given |cpu_type|, return the Adler32 CRC for the mach-o data + // segment(s). + // Return 0 on error (e.g., if the file is not a mach-o file) + uint32_t Adler32(int cpu_type); + + // For the given |cpu_type|, return the MD5 for the mach-o data segment(s). + // Return true on success, false otherwise + bool MD5(int cpu_type, unsigned char identifier[16]); + + // For the given |cpu_type|, return the SHA1 for the mach-o data segment(s). + // Return true on success, false otherwise + bool SHA1(int cpu_type, unsigned char identifier[16]); + + private: + // Signature of class member function to be called with data read from file + typedef void (MachoID::*UpdateFunction)(unsigned char *bytes, size_t size); + + // Update the CRC value by examining |size| |bytes| and applying the algorithm + // to each byte. + void UpdateCRC(unsigned char *bytes, size_t size); + + // Update the MD5 value by examining |size| |bytes| and applying the algorithm + // to each byte. + void UpdateMD5(unsigned char *bytes, size_t size); + + // Update the SHA1 value by examining |size| |bytes| and applying the + // algorithm to each byte. + void UpdateSHA1(unsigned char *bytes, size_t size); + + // Bottleneck for update routines + void Update(unsigned char *bytes, size_t size); + + // The callback from the MachoWalker for CRC, MD5, and SHA1 + static bool WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context); + + // The callback from the MachoWalker for LC_UUID + static bool UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context); + + // The callback from the MachoWalker for LC_ID_DYLIB + static bool IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, + bool swap, void *context); + + // File path + char path_[PATH_MAX]; + + // File descriptor + int file_; + + // The current crc value + uint32_t crc_; + + // The MD5 context + MD5_CTX md5_context_; + + // The SHA1 context + SHA_CTX sha1_context_; + + // The current update to call from the Update callback + UpdateFunction update_function_; +}; + +} // namespace MacFileUtilities + +#endif // COMMON_MAC_MACHO_ID_H__ diff --git a/src/common/mac/macho_walker.cc b/src/common/mac/macho_walker.cc new file mode 100644 index 00000000..96dca406 --- /dev/null +++ b/src/common/mac/macho_walker.cc @@ -0,0 +1,207 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// macho_walker.cc: Iterate over the load commands in a mach-o file +// +// See macho_walker.h for documentation +// +// Author: Dan Waylonis + +#include +#include +#include +#include +#include +#include + +#include "common/mac/macho_walker.h" + +namespace MacFileUtilities { + +MachoWalker::MachoWalker(const char *path, LoadCommandCallback callback, + void *context) + : callback_(callback), + callback_context_(context) { + file_ = open(path, O_RDONLY); +} + +MachoWalker::~MachoWalker() { + if (file_ != -1) + close(file_); +} + +int MachoWalker::ValidateCPUType(int cpu_type) { + // If the user didn't specify, try to use the local architecture. If that + // fails, use the base type for the executable. + if (cpu_type == 0) { + const NXArchInfo *arch = NXGetLocalArchInfo(); + if (arch) + cpu_type = arch->cputype; + else +#if __ppc__ + cpu_type = CPU_TYPE_POWERPC; +#elif __i386__ + cpu_type = CPU_TYPE_X86; +#else +#error Unknown architecture -- are you on a PDP-11? +#endif + } + + return cpu_type; +} + +bool MachoWalker::WalkHeader(int cpu_type) { + int valid_cpu_type = ValidateCPUType(cpu_type); + off_t offset; + if (FindHeader(valid_cpu_type, offset)) { + if (cpu_type & CPU_ARCH_ABI64) + return WalkHeader64AtOffset(offset); + + return WalkHeaderAtOffset(offset); + } + + return false; +} + +bool MachoWalker::ReadBytes(void *buffer, size_t size, off_t offset) { + return pread(file_, buffer, size, offset) == (ssize_t)size; +} + +bool MachoWalker::FindHeader(int cpu_type, off_t &offset) { + int valid_cpu_type = ValidateCPUType(cpu_type); + // Read the magic bytes that's common amongst all mach-o files + uint32_t magic; + if (!ReadBytes(&magic, sizeof(magic), 0)) + return false; + + offset = sizeof(magic); + + // Figure out what type of file we've got + bool is_fat; + if (magic == FAT_MAGIC || magic == FAT_CIGAM) { + is_fat = true; + } + else if (magic != MH_MAGIC && magic != MH_CIGAM && magic != MH_MAGIC_64 && + magic != MH_CIGAM_64) { + return false; + } + + if (!is_fat) { + // If we don't have a fat header, check if the cpu type matches the single + // header + cpu_type_t header_cpu_type; + if (!ReadBytes(&header_cpu_type, sizeof(header_cpu_type), offset)) + return false; + + if (NXHostByteOrder() != NX_BigEndian) + header_cpu_type = NXSwapLong(header_cpu_type); + + if (valid_cpu_type != header_cpu_type) + return false; + + offset = 0; + return true; + } else { + // Read the fat header and find an appropriate architecture + offset = 0; + struct fat_header fat; + if (!ReadBytes(&fat, sizeof(fat), offset)) + return false; + + if (NXHostByteOrder() != NX_BigEndian) + swap_fat_header(&fat, NXHostByteOrder()); + + offset += sizeof(fat); + + // Search each architecture for the desired one + struct fat_arch arch; + for (uint32_t i = 0; i < fat.nfat_arch; ++i) { + if (!ReadBytes(&arch, sizeof(arch), offset)) + return false; + + if (NXHostByteOrder() != NX_BigEndian) + swap_fat_arch(&arch, 1, NXHostByteOrder()); + + if (arch.cputype == valid_cpu_type) { + offset = arch.offset; + return true; + } + + offset += sizeof(arch); + } + } + + return false; +} + +bool MachoWalker::WalkHeaderAtOffset(off_t offset) { + struct mach_header header; + if (!ReadBytes(&header, sizeof(header), offset)) + return false; + + bool swap = (header.magic == MH_CIGAM); + if (swap) + swap_mach_header(&header, NXHostByteOrder()); + + return WalkHeaderCore(offset + sizeof(header), header.ncmds, swap); +} + +bool MachoWalker::WalkHeader64AtOffset(off_t offset) { + struct mach_header_64 header; + if (!ReadBytes(&header, sizeof(header), offset)) + return false; + + bool swap = (header.magic == MH_CIGAM_64); + if (swap) + swap_mach_header_64(&header, NXHostByteOrder()); + + return WalkHeaderCore(offset + sizeof(header), header.ncmds, swap); +} + +bool MachoWalker::WalkHeaderCore(off_t offset, uint32_t number_of_commands, + bool swap) { + for (uint32_t i = 0; i < number_of_commands; ++i) { + struct load_command cmd; + if (!ReadBytes(&cmd, sizeof(cmd), offset)) + return false; + + if (swap) + swap_load_command(&cmd, NXHostByteOrder()); + + // Call the user callback + if (callback_ && !callback_(this, &cmd, offset, swap, callback_context_)) + break; + + offset += cmd.cmdsize; + } + + return true; +} + +} // namespace MacFileUtilities diff --git a/src/common/mac/macho_walker.h b/src/common/mac/macho_walker.h new file mode 100644 index 00000000..0d55dd79 --- /dev/null +++ b/src/common/mac/macho_walker.h @@ -0,0 +1,94 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// macho_walker.h: Iterate over the load commands in a mach-o file +// +// Author: Dan Waylonis + +#ifndef COMMON_MAC_MACHO_WALKER_H__ +#define COMMON_MAC_MACHO_WALKER_H__ + +#include +#include + +namespace MacFileUtilities { + +class MachoWalker { + public: + // A callback function executed when a new load command is read. If no + // further processing of load commands is desired, return false. Otherwise, + // return true. + // |cmd| is the current command, and |offset| is the location relative to the + // beginning of the file (not header) where the command was read. If |swap| + // is set, then any command data (other than the returned load_command) should + // be swapped when read + typedef bool (*LoadCommandCallback)(MachoWalker *walker, load_command *cmd, + off_t offset, bool swap, void *context); + + MachoWalker(const char *path, LoadCommandCallback callback, void *context); + MachoWalker(int file_descriptor, LoadCommandCallback callback, void *context); + ~MachoWalker(); + + // Begin walking the header for |cpu_type|. If |cpu_type| is 0, then the + // native cpu type is used. Otherwise, accepted values are listed in + // /usr/include/mach/machine.h (e.g., CPU_TYPE_X86 or CPU_TYPE_POWERPC). + // Returns false if opening the file failed or if the |cpu_type| is not + // present in the file. + bool WalkHeader(int cpu_type); + + // Locate (if any) the header offset for |cpu_type| and return in |offset|. + // Return true if found, false otherwise. + bool FindHeader(int cpu_type, off_t &offset); + + // Read |size| bytes from the opened file at |offset| into |buffer| + bool ReadBytes(void *buffer, size_t size, off_t offset); + + private: + // Validate the |cpu_type| + int ValidateCPUType(int cpu_type); + + // Process an individual header starting at |offset| from the start of the + // file. Return true if successful, false otherwise. + bool WalkHeaderAtOffset(off_t offset); + bool WalkHeader64AtOffset(off_t offset); + + // Bottleneck for walking the load commands + bool WalkHeaderCore(off_t offset, uint32_t number_of_commands, bool swap); + + // File descriptor to the opened file + int file_; + + // User specified callback & context + LoadCommandCallback callback_; + void *callback_context_; +}; + +} // namespace MacFileUtilities + +#endif // COMMON_MAC_MACHO_WALKER_H__