117 lines
4.7 KiB
Diff
117 lines
4.7 KiB
Diff
From 9f4fcc77233442d38d2fc64f5861e5950ffcaab3 Mon Sep 17 00:00:00 2001
|
|
From: Jeremy Drake <github@jdrake.com>
|
|
Date: Mon, 11 Nov 2024 20:09:49 -0800
|
|
Subject: [PATCH 41/N] cygthread: suspend thread before terminating.
|
|
|
|
This addresses an extremely difficult to debug deadlock when running
|
|
under emulation on ARM64.
|
|
|
|
A relatively easy way to trigger this bug is to call `fork()`, then within the
|
|
child process immediately call another `fork()` and then `exit()` the
|
|
intermediate process.
|
|
|
|
It would seem that there is a "code emulation" lock on the wait thread at
|
|
this stage, and if the thread is terminated too early, that lock still exists
|
|
albeit without a thread, and nothing moves forward.
|
|
|
|
It seems that a `SuspendThread()` combined with a `GetThreadContext()`
|
|
(to force the thread to _actually_ be suspended, for more details see
|
|
https://devblogs.microsoft.com/oldnewthing/20150205-00/?p=44743)
|
|
makes sure the thread is "booted" from emulation before it is suspended.
|
|
|
|
Hopefully this means it won't be holding any locks or otherwise leave
|
|
emulation in a bad state when the thread is terminated.
|
|
|
|
Also, attempt to use `CancelSynchonousIo()` (as seen in `flock.cc`) to avoid
|
|
the need for `TerminateThread()` altogether. This doesn't always work,
|
|
however, so was not a complete fix for the deadlock issue.
|
|
|
|
Addresses: https://cygwin.com/pipermail/cygwin-developers/2024-May/012694.html
|
|
Signed-off-by: Jeremy Drake <cygwin@jdrake.com>
|
|
---
|
|
winsup/cygwin/cygthread.cc | 14 ++++++++++++++
|
|
winsup/cygwin/pinfo.cc | 10 +++++++---
|
|
winsup/cygwin/sigproc.cc | 12 ++++++++++--
|
|
3 files changed, 31 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/winsup/cygwin/cygthread.cc b/winsup/cygwin/cygthread.cc
|
|
index 54918e7..4f16097 100644
|
|
--- a/winsup/cygwin/cygthread.cc
|
|
+++ b/winsup/cygwin/cygthread.cc
|
|
@@ -302,6 +302,20 @@ cygthread::terminate_thread ()
|
|
if (!inuse)
|
|
goto force_notterminated;
|
|
|
|
+ if (_my_tls._ctinfo != this)
|
|
+ {
|
|
+ CONTEXT context;
|
|
+ context.ContextFlags = CONTEXT_CONTROL;
|
|
+ /* SuspendThread makes sure a thread is "booted" from emulation before
|
|
+ it is suspended. As such, the emulator hopefully won't be in a bad
|
|
+ state (aka, holding any locks) when the thread is terminated. */
|
|
+ SuspendThread (h);
|
|
+ /* We need to call GetThreadContext, even though we don't care about the
|
|
+ context, because SuspendThread is asynchronous and GetThreadContext
|
|
+ will make sure the thread is *really* suspended before returning */
|
|
+ GetThreadContext (h, &context);
|
|
+ }
|
|
+
|
|
TerminateThread (h, 0);
|
|
WaitForSingleObject (h, INFINITE);
|
|
CloseHandle (h);
|
|
diff --git a/winsup/cygwin/pinfo.cc b/winsup/cygwin/pinfo.cc
|
|
index a5f5d6e..4bb1946 100644
|
|
--- a/winsup/cygwin/pinfo.cc
|
|
+++ b/winsup/cygwin/pinfo.cc
|
|
@@ -1262,13 +1262,17 @@ proc_waiter (void *arg)
|
|
|
|
for (;;)
|
|
{
|
|
- DWORD nb;
|
|
+ DWORD nb, err;
|
|
char buf = '\0';
|
|
|
|
if (!ReadFile (vchild.rd_proc_pipe, &buf, 1, &nb, NULL)
|
|
- && GetLastError () != ERROR_BROKEN_PIPE)
|
|
+ && (err = GetLastError ()) != ERROR_BROKEN_PIPE)
|
|
{
|
|
- system_printf ("error on read of child wait pipe %p, %E", vchild.rd_proc_pipe);
|
|
+ /* ERROR_OPERATION_ABORTED is expected due to the possibility that
|
|
+ CancelSynchronousIo interruped the ReadFile call, so don't output
|
|
+ that error */
|
|
+ if (err != ERROR_OPERATION_ABORTED)
|
|
+ system_printf ("error on read of child wait pipe %p, %E", vchild.rd_proc_pipe);
|
|
break;
|
|
}
|
|
|
|
diff --git a/winsup/cygwin/sigproc.cc b/winsup/cygwin/sigproc.cc
|
|
index 858a7fd..a6d06a8 100644
|
|
--- a/winsup/cygwin/sigproc.cc
|
|
+++ b/winsup/cygwin/sigproc.cc
|
|
@@ -413,7 +413,11 @@ proc_terminate ()
|
|
to 1 iff it is a Cygwin process. */
|
|
if (!have_execed || !have_execed_cygwin)
|
|
chld_procs[i]->ppid = 1;
|
|
- if (chld_procs[i].wait_thread)
|
|
+ /* Attempt to exit the wait_thread cleanly via CancelSynchronousIo
|
|
+ before falling back to the (explicitly dangerous) cross-thread
|
|
+ termination */
|
|
+ if (chld_procs[i].wait_thread
|
|
+ && !CancelSynchronousIo (chld_procs[i].wait_thread->thread_handle ()))
|
|
chld_procs[i].wait_thread->terminate_thread ();
|
|
/* Release memory associated with this process unless it is 'myself'.
|
|
'myself' is only in the chld_procs table when we've execed. We
|
|
@@ -1198,7 +1202,11 @@ remove_proc (int ci)
|
|
{
|
|
if (have_execed)
|
|
{
|
|
- if (_my_tls._ctinfo != chld_procs[ci].wait_thread)
|
|
+ /* Attempt to exit the wait_thread cleanly via CancelSynchronousIo
|
|
+ before falling back to the (explicitly dangerous) cross-thread
|
|
+ termination */
|
|
+ if (_my_tls._ctinfo != chld_procs[ci].wait_thread
|
|
+ && !CancelSynchronousIo (chld_procs[ci].wait_thread->thread_handle ()))
|
|
chld_procs[ci].wait_thread->terminate_thread ();
|
|
}
|
|
else if (chld_procs[ci] && chld_procs[ci]->exists ())
|