Fix subprocess model saving on Windows

On Windows the interrupt for subprocesses works in a different way from OSX/Linux. The result is that child subprocesses and their pipes may close while the parent process is still running during a keyboard (ctrl+C) interrupt. To handle this, this change adds handling for EOFError and BrokenPipeError exceptions when interacting with subprocess environments. Additional management is also added to be sure when using parallel runs using the "num-runs" option that the threads for each run are joined and KeyboardInterrupts are handled. These changes made the "_win_handler" we used to specially manage interrupts on Windows unnecessary, so they have been removed.
Unity-Technologies · eshvk · Apr 12, 2019 · Feb 12, 2019 · Feb 19, 2019 · Feb 20, 2019
commit 15fcf95f86c837facb39b03381f9e379449f3d18
diff --git a/ml-agents-envs/mlagents/envs/subprocess_environment.py b/ml-agents-envs/mlagents/envs/subprocess_environment.py
@@ -27,15 +27,24 @@ class UnityEnvWorker(NamedTuple):
     conn: Connection
 
     def send(self, name: str, payload=None):
-        cmd = EnvironmentCommand(name, payload)
-        self.conn.send(cmd)
+        try:
+            cmd = EnvironmentCommand(name, payload)
+            self.conn.send(cmd)
+        except (BrokenPipeError, EOFError):
+            raise KeyboardInterrupt
 
     def recv(self) -> EnvironmentResponse:
-        response: EnvironmentResponse = self.conn.recv()
-        return response
+        try:
+            response: EnvironmentResponse = self.conn.recv()
+            return response
+        except (BrokenPipeError, EOFError):
+            raise KeyboardInterrupt
 
     def close(self):
-        self.conn.send(EnvironmentCommand("close"))
+        try:
+            self.conn.send(EnvironmentCommand('close'))
+        except (BrokenPipeError, EOFError):
+            pass
         self.process.join()
 
 
@@ -87,10 +96,10 @@ def create_worker(
             env_factory: Callable[[int], BaseUnityEnvironment]
     ) -> UnityEnvWorker:
         parent_conn, child_conn = Pipe()
+
         # Need to use cloudpickle for the env factory function since function objects aren't picklable
         # on Windows as of Python 3.6.
         pickled_env_factory = cloudpickle.dumps(env_factory)
-
         child_process = Process(target=worker, args=(child_conn, pickled_env_factory, worker_id))
         child_process.start()
         return UnityEnvWorker(child_process, worker_id, parent_conn)

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
@@ -51,7 +51,7 @@ def run_training(sub_id: int, run_seed: int, run_options, process_queue):
     trainer_config_path = run_options['<trainer-config-path>']
     # Recognize and use docker volume if one is passed as an argument
     if not docker_target_name:
-        model_path = './models/{run_id}'.format(run_id=run_id)
+        model_path = './models/{run_id}-{sub_id}'.format(run_id=run_id, sub_id=sub_id)
         summaries_dir = './summaries'
     else:
         trainer_config_path = \
@@ -63,9 +63,10 @@ def run_training(sub_id: int, run_seed: int, run_options, process_queue):
                 '/{docker_target_name}/{curriculum_folder}'.format(
                     docker_target_name=docker_target_name,
                     curriculum_folder=curriculum_folder)
-        model_path = '/{docker_target_name}/models/{run_id}'.format(
+        model_path = '/{docker_target_name}/models/{run_id}-{sub_id}'.format(
             docker_target_name=docker_target_name,
-            run_id=run_id)
+            run_id=run_id,
+            sub_id=sub_id)
         summaries_dir = '/{docker_target_name}/summaries'.format(
             docker_target_name=docker_target_name)
 
@@ -274,6 +275,14 @@ def main():
             while process_queue.get() is not True:
                 continue
 
+    # Wait for jobs to complete.  Otherwise we'll have an extra
+    # unhandled KeyboardInterrupt if we end early.
+    try:
+        for job in jobs:
+            job.join()
+    except KeyboardInterrupt:
+        pass
+
 # For python debugger to directly run this script
 if __name__ == "__main__":
     main()
diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py
@@ -41,7 +41,7 @@ def test_run_training(load_config, create_environment_factory, subproc_env_mock)
         with patch.object(TrainerController, "start_learning", MagicMock()):
             learn.run_training(0, 0, basic_options(), MagicMock())
             mock_init.assert_called_once_with(
-                './models/ppo',
+                './models/ppo-0',
                 './summaries',
                 'ppo-0',
                 50000,
@@ -74,5 +74,5 @@ def test_docker_target_path(load_config, create_environment_factory, subproc_env
         with patch.object(TrainerController, "start_learning", MagicMock()):
             learn.run_training(0, 0, options_with_docker_target, MagicMock())
             mock_init.assert_called_once()
-            assert(mock_init.call_args[0][0] == '/dockertarget/models/ppo')
+            assert(mock_init.call_args[0][0] == '/dockertarget/models/ppo-0')
             assert(mock_init.call_args[0][1] == '/dockertarget/summaries')
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -6,9 +6,6 @@
 import logging
 import shutil
 import sys
-if sys.platform.startswith('win'):
-    import win32api
-    import win32con
 from typing import *
 
 import numpy as np
@@ -104,18 +101,6 @@ def _save_model_when_interrupted(self, steps=0):
                          'while the graph is generated.')
         self._save_model(steps)
 
-    def _win_handler(self, event):
-        """
-        This function gets triggered after ctrl-c or ctrl-break is pressed
-        under Windows platform.
-        """
-        if event in (win32con.CTRL_C_EVENT, win32con.CTRL_BREAK_EVENT):
-            self._save_model_when_interrupted(self.global_step)
-            self._export_graph()
-            sys.exit()
-            return True
-        return False
-
     def _write_training_metrics(self):
         """
         Write all CSV metrics
@@ -223,9 +208,6 @@ def start_learning(self, env: BaseUnityEnvironment, trainer_config):
             for brain_name, trainer in self.trainers.items():
                 trainer.write_tensorboard_text('Hyperparameters',
                                                trainer.parameters)
-            if sys.platform.startswith('win'):
-                # Add the _win_handler function to the windows console's handler function list
-                win32api.SetConsoleCtrlHandler(self._win_handler, True)
         try:
             curr_info = self._reset_env(env)
             while any([t.get_step <= t.get_max_steps \