mirror of
https://github.com/yuzu-emu/breakpad.git
synced 2024-12-23 23:45:27 +00:00
fix races in CrashGenerator::CreateChildCrash
The current CreateChildCrash logic is racy when it comes to creating a crash dump for two reasons: The main thread that calls kill() on a different thread is guaranteed the signal will be *queued* when it returns, but not *delivered*. If the kernel doesn't automatically schedule the receiving thread, but instead lets the main thread run to the exit() call, then the signal never triggers a coredump and the whole process simply exits. The main thread is using kill() to try to deliver a signal to a specific thread, but that function is for sending signals to a process. That means the kernel is free to deliver the signal to any thread in the process and not just the one requested. This manifests itself as the pr_pid in the coredump not being the one expected. Instead, we must use tkill() with the tid (which we already took care of gathering) to deliver to a specific thread. These are a lot easier to see on a UMP system as contention is heavier. BUG=chromium:207918 TEST=`dumper_unittest` still passes, and doesn't flake out in a UMP system TEST=`linux_client_unittest` still passes R=benchan@chromium.org Review URL: https://breakpad.appspot.com/1304005 git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@1299 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
parent
33a84041e7
commit
0ac94ba617
|
@ -74,14 +74,8 @@ TEST(LinuxCoreDumperTest, VerifyDumpWithMultipleThreads) {
|
||||||
const unsigned kCrashThread = 1;
|
const unsigned kCrashThread = 1;
|
||||||
const int kCrashSignal = SIGABRT;
|
const int kCrashSignal = SIGABRT;
|
||||||
pid_t child_pid;
|
pid_t child_pid;
|
||||||
// TODO(benchan): Revert to use ASSERT_TRUE once the flakiness in
|
ASSERT_TRUE(crash_generator.CreateChildCrash(kNumOfThreads, kCrashThread,
|
||||||
// CrashGenerator is identified and fixed.
|
kCrashSignal, &child_pid));
|
||||||
if (!crash_generator.CreateChildCrash(kNumOfThreads, kCrashThread,
|
|
||||||
kCrashSignal, &child_pid)) {
|
|
||||||
fprintf(stderr, "LinuxCoreDumperTest.VerifyDumpWithMultipleThreads test "
|
|
||||||
"is skipped due to no core dump generated\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const string core_file = crash_generator.GetCoreFilePath();
|
const string core_file = crash_generator.GetCoreFilePath();
|
||||||
const string procfs_path = crash_generator.GetDirectoryOfProcFilesCopy();
|
const string procfs_path = crash_generator.GetDirectoryOfProcFilesCopy();
|
||||||
|
|
|
@ -138,14 +138,8 @@ TEST(ElfCoreDumpTest, ValidCoreFile) {
|
||||||
const unsigned kNumOfThreads = 3;
|
const unsigned kNumOfThreads = 3;
|
||||||
const unsigned kCrashThread = 1;
|
const unsigned kCrashThread = 1;
|
||||||
const int kCrashSignal = SIGABRT;
|
const int kCrashSignal = SIGABRT;
|
||||||
// TODO(benchan): Revert to use ASSERT_TRUE once the flakiness in
|
ASSERT_TRUE(crash_generator.CreateChildCrash(kNumOfThreads, kCrashThread,
|
||||||
// CrashGenerator is identified and fixed.
|
kCrashSignal, NULL));
|
||||||
if (!crash_generator.CreateChildCrash(kNumOfThreads, kCrashThread,
|
|
||||||
kCrashSignal, NULL)) {
|
|
||||||
fprintf(stderr, "ElfCoreDumpTest.ValidCoreFile test is skipped "
|
|
||||||
"due to no core dump generated");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
pid_t expected_crash_thread_id = crash_generator.GetThreadId(kCrashThread);
|
pid_t expected_crash_thread_id = crash_generator.GetThreadId(kCrashThread);
|
||||||
set<pid_t> expected_thread_ids;
|
set<pid_t> expected_thread_ids;
|
||||||
for (unsigned i = 0; i < kNumOfThreads; ++i) {
|
for (unsigned i = 0; i < kNumOfThreads; ++i) {
|
||||||
|
|
|
@ -65,15 +65,26 @@ const char* const kProcFilesToCopy[] = {
|
||||||
const size_t kNumProcFilesToCopy =
|
const size_t kNumProcFilesToCopy =
|
||||||
sizeof(kProcFilesToCopy) / sizeof(kProcFilesToCopy[0]);
|
sizeof(kProcFilesToCopy) / sizeof(kProcFilesToCopy[0]);
|
||||||
|
|
||||||
|
int gettid() {
|
||||||
|
// Glibc does not provide a wrapper for this.
|
||||||
|
return syscall(__NR_gettid);
|
||||||
|
}
|
||||||
|
|
||||||
|
int tkill(pid_t tid, int sig) {
|
||||||
|
// Glibc does not provide a wrapper for this.
|
||||||
|
return syscall(__NR_tkill, tid, sig);
|
||||||
|
}
|
||||||
|
|
||||||
// Core file size limit set to 1 MB, which is big enough for test purposes.
|
// Core file size limit set to 1 MB, which is big enough for test purposes.
|
||||||
const rlim_t kCoreSizeLimit = 1024 * 1024;
|
const rlim_t kCoreSizeLimit = 1024 * 1024;
|
||||||
|
|
||||||
void *thread_function(void *data) {
|
void *thread_function(void *data) {
|
||||||
ThreadData* thread_data = reinterpret_cast<ThreadData*>(data);
|
ThreadData* thread_data = reinterpret_cast<ThreadData*>(data);
|
||||||
volatile pid_t thread_id = syscall(__NR_gettid);
|
volatile pid_t thread_id = gettid();
|
||||||
*(thread_data->thread_id_ptr) = thread_id;
|
*(thread_data->thread_id_ptr) = thread_id;
|
||||||
int result = pthread_barrier_wait(thread_data->barrier);
|
int result = pthread_barrier_wait(thread_data->barrier);
|
||||||
if (result != 0 && result != PTHREAD_BARRIER_SERIAL_THREAD) {
|
if (result != 0 && result != PTHREAD_BARRIER_SERIAL_THREAD) {
|
||||||
|
perror("Failed to wait for sync barrier");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -160,11 +171,16 @@ bool CrashGenerator::SetCoreFileSizeLimit(rlim_t limit) const {
|
||||||
bool CrashGenerator::CreateChildCrash(
|
bool CrashGenerator::CreateChildCrash(
|
||||||
unsigned num_threads, unsigned crash_thread, int crash_signal,
|
unsigned num_threads, unsigned crash_thread, int crash_signal,
|
||||||
pid_t* child_pid) {
|
pid_t* child_pid) {
|
||||||
if (num_threads == 0 || crash_thread >= num_threads)
|
if (num_threads == 0 || crash_thread >= num_threads) {
|
||||||
|
fprintf(stderr, "CrashGenerator: Invalid thread counts; num_threads=%u"
|
||||||
|
" crash_thread=%u\n", num_threads, crash_thread);
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (!MapSharedMemory(num_threads * sizeof(pid_t)))
|
if (!MapSharedMemory(num_threads * sizeof(pid_t))) {
|
||||||
|
perror("CrashGenerator: Unable to map shared memory");
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
pid_t pid = fork();
|
pid_t pid = fork();
|
||||||
if (pid == 0) {
|
if (pid == 0) {
|
||||||
|
@ -183,9 +199,22 @@ bool CrashGenerator::CreateChildCrash(
|
||||||
fprintf(stderr, "CrashGenerator: Failed to copy proc files\n");
|
fprintf(stderr, "CrashGenerator: Failed to copy proc files\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if (kill(*GetThreadIdPointer(crash_thread), crash_signal) == -1) {
|
if (tkill(*GetThreadIdPointer(crash_thread), crash_signal) == -1) {
|
||||||
perror("CrashGenerator: Failed to kill thread by signal");
|
perror("CrashGenerator: Failed to kill thread by signal");
|
||||||
|
} else {
|
||||||
|
// At this point, we've queued the signal for delivery, but there's no
|
||||||
|
// guarantee when it'll be delivered. We don't want the main thread to
|
||||||
|
// race and exit before the thread we signaled is processed. So sleep
|
||||||
|
// long enough that we won't flake even under fairly high load.
|
||||||
|
// TODO: See if we can't be a bit more deterministic. There doesn't
|
||||||
|
// seem to be an API to check on signal delivery status, so we can't
|
||||||
|
// really poll and wait for the kernel to declare the signal has been
|
||||||
|
// delivered. If it has, and things worked, we'd be killed, so the
|
||||||
|
// sleep length doesn't really matter.
|
||||||
|
sleep(10 * 60);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
perror("CrashGenerator: Failed to set core limit");
|
||||||
}
|
}
|
||||||
exit(1);
|
exit(1);
|
||||||
} else if (pid == -1) {
|
} else if (pid == -1) {
|
||||||
|
@ -200,8 +229,8 @@ bool CrashGenerator::CreateChildCrash(
|
||||||
}
|
}
|
||||||
if (!WIFSIGNALED(status) || WTERMSIG(status) != crash_signal) {
|
if (!WIFSIGNALED(status) || WTERMSIG(status) != crash_signal) {
|
||||||
fprintf(stderr, "CrashGenerator: Child process not killed by the expected signal\n"
|
fprintf(stderr, "CrashGenerator: Child process not killed by the expected signal\n"
|
||||||
" exit status=0x%x signaled=%s sig=%d expected=%d\n",
|
" exit status=0x%x pid=%u signaled=%s sig=%d expected=%d\n",
|
||||||
status, WIFSIGNALED(status) ? "true" : "false",
|
status, pid, WIFSIGNALED(status) ? "true" : "false",
|
||||||
WTERMSIG(status), crash_signal);
|
WTERMSIG(status), crash_signal);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue