diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6b7b8d8fa52aa6f9690bacd6b855e0476a1a3c95..8fb2694c7a72bb8aeeaef99036e12382bfaa8e64 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -112,7 +112,7 @@ mysql-39:
 
 diffcov:
   script:
-  - git config --global --add safe.directory /builds/mailman/mailman
+  - git config --global --add safe.directory "/builds/${CI_PROJECT_NAMESPACE}/mailman"
   - git config safe.directory
   - tox -e py37-diffcov
   only:
diff --git a/src/mailman/bin/master.py b/src/mailman/bin/master.py
index aceeb23cc4bb391a22b3009d6e7ecd9c051599f8..5a4816d3edc285ff2e521cfadc9b188958bd463d 100644
--- a/src/mailman/bin/master.py
+++ b/src/mailman/bin/master.py
@@ -257,6 +257,7 @@ class Loop:
     """Main control loop class."""
 
     def __init__(self, lock=None, restartable=None, config_file=None):
+        self._log = logging.getLogger('mailman.runner')
         self._lock = lock
         self._restartable = restartable
         self._config_file = config_file
@@ -264,7 +265,6 @@ class Loop:
 
     def install_signal_handlers(self):
         """Install various signals handlers for control from the master."""
-        log = logging.getLogger('mailman.runner')
         # Set up our signal handlers.  Also set up a SIGALRM handler to
         # refresh the lock once per day.  The lock lifetime is 1 day + 6 hours
         # so this should be plenty.
@@ -273,32 +273,64 @@ class Loop:
             signal.alarm(SECONDS_IN_A_DAY)
         signal.signal(signal.SIGALRM, sigalrm_handler)
         signal.alarm(SECONDS_IN_A_DAY)
-        # SIGHUP tells the runners to close and reopen their log files.
-        def sighup_handler(signum, frame):                        # noqa: E306
-            reopen()
-            for pid in self._kids:
+        signal.signal(signal.SIGHUP, self._sighup_handler)
+        signal.signal(signal.SIGUSR1, self._sigusr1_handler)
+        signal.signal(signal.SIGTERM, self._sigterm_handler)
+        signal.signal(signal.SIGINT, self._sigint_handler)
+
+    def _sighup_handler(self, signum, frame):
+        """Handles SIGHUP.
+
+        SIGHUP tells the runners to close and reopen their log files.
+        """
+        reopen()
+        for pid in self._kids:
+            try:
                 os.kill(pid, signal.SIGHUP)
-            log.info('Master watcher caught SIGHUP.  Re-opening log files.')
-        signal.signal(signal.SIGHUP, sighup_handler)
-        # SIGUSR1 is used by 'mailman restart'.
-        def sigusr1_handler(signum, frame):                       # noqa: E306
-            for pid in self._kids:
+            except ProcessLookupError:                       # pragma: nocover
+                pass
+        self._log.info('Master watcher caught SIGHUP.  Re-opening log files.')
+
+    def _sigusr1_handler(self, signum, frame):
+        """Handles SIGUSR1.
+
+        SIGUSR1 is used by 'mailman restart'.
+        """
+        for pid in self._kids:
+            try:
                 os.kill(pid, signal.SIGUSR1)
-            log.info('Master watcher caught SIGUSR1.  Exiting.')
-        signal.signal(signal.SIGUSR1, sigusr1_handler)
-        # SIGTERM is what init will kill this process with when changing run
-        # levels.  It's also the signal 'mailman stop' uses.
-        def sigterm_handler(signum, frame):                       # noqa: E306
-            for pid in self._kids:
+            except ProcessLookupError:                       # pragma: nocover
+                pass
+        self._log.info('Master watcher caught SIGUSR1.  Restarting.')
+
+    def _sigterm_handler(self, signum, frame):
+        """Handles SIGTERM.
+
+        SIGTERM is what init will kill this process with when changing
+        run levels.  It's also the signal 'mailman stop' uses.
+        """
+        # Clear the flag so that we won't try to restart the runners.
+        self._restartable = False
+        for pid in self._kids:
+            try:
                 os.kill(pid, signal.SIGTERM)
-            log.info('Master watcher caught SIGTERM.  Exiting.')
-        signal.signal(signal.SIGTERM, sigterm_handler)
-        # SIGINT is what control-C gives.
-        def sigint_handler(signum, frame):                        # noqa: E306
-            for pid in self._kids:
+            except ProcessLookupError:                       # pragma: nocover
+                pass
+        self._log.info('Master watcher caught SIGTERM.  Exiting.')
+
+    def _sigint_handler(self, signum, frame):
+        """Handles SIGINT.
+
+        SIGINT is what control-C gives.
+        """
+        # Clear the flag so that we won't try to restart the runners.
+        self._restartable = False
+        for pid in self._kids:
+            try:
                 os.kill(pid, signal.SIGINT)
-            log.info('Master watcher caught SIGINT.  Restarting.')
-        signal.signal(signal.SIGINT, sigint_handler)
+            except ProcessLookupError:                       # pragma: nocover
+                pass
+        self._log.info('Master watcher caught SIGINT.  Exiting.')
 
     def _start_runner(self, spec):
         """Start a runner.
@@ -392,8 +424,14 @@ class Loop:
         """Sleep until a signal is received."""
         # Sleep until a signal is received.  This prevents the master from
         # exiting immediately even if there are no runners (as happens in the
-        # test suite).
+        # test suite).  We install a handler for SIGCHLD so that pause(2)
+        # will return control to us.
+        signal.signal(signal.SIGCHLD,
+                      lambda sig, stack: None)               # pragma: nocover
         signal.pause()
+        # Note: We cannot set the signal handler to SIG_IGN again,
+        # because since POSIX.1-2001 this changes the semantics of
+        # wait(2) to not return until all children have exited.
 
     def loop(self):
         """Main loop.
@@ -434,11 +472,20 @@ class Loop:
             # runaway restarts (e.g.  if the subprocess had a syntax error!)
             rname, slice_number, count, restarts = self._kids.pop(pid)
             config_name = 'runner.' + rname
-            restart = False
-            if why == signal.SIGUSR1 and self._restartable:  # pragma: nocover
-                restart = True
+            restart = self._restartable
+            # Don't restart if the runner explicitly asks not to be
+            # restarted.
+            if why == signal.SIGTERM:
+                # Note: See bin/runner.py and core/runner.py where
+                # this signaling is used.
+                restart = False
             # Have we hit the maximum number of restarts?
-            restarts += 1
+            if not why == signal.SIGUSR1:
+                # Note: Explicit restarts using SIGUSR1 should not be
+                # counted here, because max_restarts only limits the
+                # number of automated restarts (see
+                # mailman/config/schema.cfg).
+                restarts += 1
             max_restarts = int(getattr(config, config_name).max_restarts)
             if restarts > max_restarts:
                 restart = False
diff --git a/src/mailman/bin/tests/test_master.py b/src/mailman/bin/tests/test_master.py
index e5e9b203ce4e0e295ce2915f11cbe202e48a0483..092c337c9c398ee8f9fe6b023755c383c5e3ebb4 100644
--- a/src/mailman/bin/tests/test_master.py
+++ b/src/mailman/bin/tests/test_master.py
@@ -18,6 +18,8 @@
 """Test master watcher utilities."""
 
 import os
+import time
+import signal
 import tempfile
 import unittest
 
@@ -29,6 +31,7 @@ from importlib_resources import path
 from io import StringIO
 from mailman.bin import master
 from mailman.config import config
+from mailman.testing.helpers import LogFileMark, TestableMaster
 from mailman.testing.layers import ConfigLayer
 from unittest.mock import patch
 
@@ -135,3 +138,216 @@ Exiting.
             # We created a non-restartable loop.
             start_mock.assert_called_once_with([('in', 1, 1)])
             loop_mock.assert_called_once_with()
+
+    def test_sighup_handler(self):
+        """Invokes the SIGHUP handler.
+
+        This sends SIGHUPs to the runners.  Unfortunately, there is no
+        easy way to test whether that actually happens.  We'd need a
+        specialized runner for that.
+
+        """
+        m = TestableMaster()
+        mark = LogFileMark('mailman.runner')
+        m.start('command')
+        self.assertEqual(len(list(m._kids)), 1)
+
+        # We need to give the runner some time to install the signal
+        # handler.  If we send SIGHUP before it does, the default
+        # action is to terminate the process.  We wait until we see
+        # that the runner has done so by inspecting the log file.
+        # This is race free, and bounded in time.
+        start = time.time()
+        while ("runner started." not in mark.read()
+               and time.time() - start < 10):
+            time.sleep(0.1)
+
+        mark = LogFileMark('mailman.runner')
+        m._sighup_handler(None, None)
+
+        # Check if the runner reopened the log.
+        start = time.time()
+        needle = "command runner caught SIGHUP.  Reopening logs."
+        while (needle not in mark.read()
+               and time.time() - start < 10):
+            time.sleep(0.1)
+        self.assertIn(needle, mark.read())
+
+        # Just to make sure it didn't die.
+        self.assertEqual(len(list(m._kids)), 1)
+        m.stop()
+
+    def test_sigusr1_handler(self):
+        """Invokes the SIGUSR1 handler.
+
+        We then check whether the runners restart.
+        """
+        m = TestableMaster()
+        m._restartable = True
+        mark = LogFileMark('mailman.runner')
+        m.start('command')
+        kids = list(m._kids)
+        self.assertEqual(len(kids), 1)
+        old_kid = kids[0]
+
+        # We need to give the runner some time to install the signal
+        # handler.  If we send SIGUSR1 before it does, the default
+        # action is to terminate the process.  We wait until we see
+        # that the runner has done so by inspecting the log file.
+        # This is race free, and bounded in time.
+        start = time.time()
+        while ("runner started." not in mark.read()
+               and time.time() - start < 10):
+            time.sleep(0.1)
+
+        # Invoke the handler in a loop.  This is race free, and
+        # bounded in time.
+        start = time.time()
+        mark = LogFileMark('mailman.runner')
+        while old_kid in set(m._kids) and time.time() - start < 10:
+            # We must not send signals in rapid succession, because
+            # the behavior of signals arriving while the process is in
+            # the signal handler varies.  Linux implements System V
+            # semantics, which means the default signal action is
+            # restored for the duration of the signal handler.  In
+            # this case it means to terminate the process.
+            time.sleep(1)
+            m._sigusr1_handler(None, None)
+
+        # Check if the runner got the signal.
+        start = time.time()
+        needle = "command runner caught SIGUSR1.  Stopping."
+        while (needle not in mark.read()
+               and time.time() - start < 10):
+            time.sleep(0.1)
+        self.assertIn(needle, mark.read())
+
+        new_kids = list(m._kids)
+        self.assertEqual(len(new_kids), 1)
+        self.assertTrue(kids[0] != new_kids[0])
+        m._restartable = False
+        for pid in new_kids:
+            try:
+                os.kill(pid, signal.SIGTERM)
+            except ProcessLookupError:
+                pass
+        m.thread.join()
+        m.cleanup()
+
+    def test_sigterm_handler(self):
+        """Invokes the SIGTERM handler.
+
+        We then check whether the runners are actually stopped.
+        """
+        m = TestableMaster()
+        mark = LogFileMark('mailman.runner')
+        m.start('command')
+        kids = list(m._kids)
+        self.assertEqual(len(kids), 1)
+        old_kid = kids[0]
+
+        # We need to give the runner some time to install the signal
+        # handler.  If we send SIGTERM before it does, the default
+        # action is to terminate the process and will return a
+        # slightly different status code.  We wait until we see that
+        # the runner has done so by inspecting the log file.  This is
+        # race free, and bounded in time.
+        start = time.time()
+        while ("runner started." not in mark.read()
+               and time.time() - start < 10):
+            time.sleep(0.1)
+
+        # Invoke the handler in a loop.  This is race free, and
+        # bounded in time.
+        start = time.time()
+        mark = LogFileMark('mailman.runner')
+        while old_kid in set(m._kids) and time.time() - start < 10:
+            time.sleep(0.1)
+            m._sigterm_handler(None, None)
+
+        # Check if the runner got the signal.
+        start = time.time()
+        needle = "command runner caught SIGTERM.  Stopping."
+        while (needle not in mark.read()
+               and time.time() - start < 10):
+            time.sleep(0.1)
+        self.assertIn(needle, mark.read())
+
+        m.thread.join()
+        self.assertEqual(len(list(m._kids)), 0)
+        m.cleanup()
+
+    def test_sigint_handler(self):
+        """Invokes the SIGINT handler.
+
+        We then check whether the runners are actually stopped.
+        """
+        m = TestableMaster()
+        mark = LogFileMark('mailman.runner')
+        m.start('command')
+        self.assertEqual(len(list(m._kids)), 1)
+
+        kids = list(m._kids)
+        self.assertEqual(len(kids), 1)
+        old_kid = kids[0]
+
+        # We need to give the runner some time to install the signal
+        # handler.  If we send SIGINT before it does, the default
+        # action is to terminate the process and will return a
+        # slightly different status code.  We wait until we see that
+        # the runner has done so by inspecting the log file.  This is
+        # race free, and bounded in time.
+        start = time.time()
+        while ("runner started." not in mark.read()
+               and time.time() - start < 10):
+            time.sleep(0.1)
+
+        # Invoke the handler in a loop.  This is race free, and
+        # bounded in time.
+        start = time.time()
+        mark = LogFileMark('mailman.runner')
+        while old_kid in set(m._kids) and time.time() - start < 10:
+            time.sleep(0.1)
+            m._sigint_handler(None, None)
+
+        # Check if the runner got the signal.
+        start = time.time()
+        needle = "command runner caught SIGINT.  Stopping."
+        while (needle not in mark.read()
+               and time.time() - start < 10):
+            time.sleep(0.1)
+        self.assertIn(needle, mark.read())
+
+        m.thread.join()
+        self.assertEqual(len(list(m._kids)), 0)
+        m.cleanup()
+
+    def test_runner_restart_on_sigkill(self):
+        """Kills a runner with SIGKILL and see if it restarted."""
+        m = TestableMaster()
+        m._restartable = True
+        m.start('command')
+        kids = list(m._kids)
+        self.assertEqual(len(kids), 1)
+        old_kid = kids[0]
+
+        # Kill it.  No need to wait for anything as this cannot be
+        # caught anyway.
+        os.kill(old_kid, signal.SIGKILL)
+
+        # But, we need to wait for the master to collect it.
+        start = time.time()
+        while old_kid in set(m._kids) and time.time() - start < 10:
+            time.sleep(0.1)
+
+        new_kids = list(m._kids)
+        self.assertEqual(len(new_kids), 1)
+        self.assertTrue(old_kid != new_kids[0])
+        m._restartable = False
+        for pid in new_kids:
+            try:
+                os.kill(pid, signal.SIGTERM)
+            except ProcessLookupError:
+                pass
+        m.thread.join()
+        m.cleanup()
diff --git a/src/mailman/docs/NEWS.rst b/src/mailman/docs/NEWS.rst
index 98bb1e05f8c023806b03587c1f7553356c5ff69f..ac218c0938b66bbe8d3ff4bc32e9bb5d422a994d 100644
--- a/src/mailman/docs/NEWS.rst
+++ b/src/mailman/docs/NEWS.rst
@@ -24,6 +24,8 @@ Bugs fixed
 ----------
 * ``config.mta.remove_dkim_headers`` now applies to messages to -owner.
   (Closes #1053)
+* The master process now robustly detects when runner processes
+  terminate and restarts them (Closes #898).
 
 .. _news-3.3.8:
 
diff --git a/src/mailman/testing/layers.py b/src/mailman/testing/layers.py
index 84e5735ec27966147afa1596a6cbf306dabe9691..6ea9f67f9e62ca7c0a34a6bb4b2a19580a5bbccc 100644
--- a/src/mailman/testing/layers.py
+++ b/src/mailman/testing/layers.py
@@ -162,12 +162,16 @@ class ConfigLayer(MockAndMonkeyLayer):
             # debugging because it's just too verbose.  Unfortunately, if you
             # do want that level of debugging you currently have to manually
             # modify this conditional.
-            if cls.stderr and sub_name != 'database':
+            if sub_name != 'database':
                 test_config += expand(dedent("""
                 [logging.$name]
-                propagate: yes
-                level: debug
-                """), None, dict(name=sub_name, path=path))
+                path: $path
+                $extra_verbose
+                """), None, dict(name=sub_name, path=path,
+                                 extra_verbose=dedent("""
+                                     propagate: yes
+                                     level: debug
+                                     """) if cls.stderr else ""))
         # The root logger will already have a handler, but it's not the right
         # handler.  Remove that and set our own.
         if cls.stderr: