Skip to content

Commit cc6c22f

Browse files
author
Ervin T
authored
Add more extensive tests for BC trainer (#2506)
* Add more extensive tests for BC trainer * Break up tests for BC trainer
1 parent d2ceb9f commit cc6c22f

File tree

2 files changed

+52
-9
lines changed

2 files changed

+52
-9
lines changed

ml-agents/mlagents/trainers/tests/mock_brain.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,13 @@ def setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo):
9191
:Mock mock_brain: A mock Brain object that specifies the params of this environment.
9292
:Mock mock_braininfo: A mock BrainInfo object that will be returned at each step and reset.
9393
"""
94+
brain_name = mock_brain.brain_name
9495
mock_env.return_value.academy_name = "MockAcademy"
95-
mock_env.return_value.brains = {"MockBrain": mock_brain}
96-
mock_env.return_value.external_brain_names = ["MockBrain"]
97-
mock_env.return_value.brain_names = ["MockBrain"]
98-
mock_env.return_value.reset.return_value = {"MockBrain": mock_braininfo}
99-
mock_env.return_value.step.return_value = {"MockBrain": mock_braininfo}
96+
mock_env.return_value.brains = {brain_name: mock_brain}
97+
mock_env.return_value.external_brain_names = [brain_name]
98+
mock_env.return_value.brain_names = [brain_name]
99+
mock_env.return_value.reset.return_value = {brain_name: mock_braininfo}
100+
mock_env.return_value.step.return_value = {brain_name: mock_braininfo}
100101

101102

102103
def simulate_rollout(env, policy, buffer_init_samples):

ml-agents/mlagents/trainers/tests/test_bc.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
def dummy_config():
1919
return yaml.safe_load(
2020
"""
21-
hidden_units: 128
21+
hidden_units: 32
2222
learning_rate: 3.0e-4
23-
num_layers: 2
23+
num_layers: 1
2424
use_recurrent: false
2525
sequence_length: 32
2626
memory_size: 32
@@ -32,8 +32,8 @@ def dummy_config():
3232
)
3333

3434

35-
@mock.patch("mlagents.envs.UnityEnvironment")
36-
def test_bc_trainer(mock_env, dummy_config):
35+
def create_bc_trainer(dummy_config):
36+
mock_env = mock.Mock()
3737
mock_brain = mb.create_mock_3dball_brain()
3838
mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8)
3939
mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
@@ -49,12 +49,54 @@ def test_bc_trainer(mock_env, dummy_config):
4949
mock_brain, trainer_parameters, training=True, load=False, seed=0, run_id=0
5050
)
5151
trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy, 100)
52+
return trainer, env
53+
54+
55+
def test_bc_trainer_step(dummy_config):
56+
trainer, env = create_bc_trainer(dummy_config)
57+
# Test get_step
58+
assert trainer.get_step == 0
59+
# Test update policy
5260
trainer.update_policy()
5361
assert len(trainer.stats["Losses/Cloning Loss"]) > 0
62+
# Test increment step
5463
trainer.increment_step(1)
5564
assert trainer.step == 1
5665

5766

67+
def test_bc_trainer_add_proc_experiences(dummy_config):
68+
trainer, env = create_bc_trainer(dummy_config)
69+
# Test add_experiences
70+
returned_braininfo = env.step()
71+
trainer.add_experiences(
72+
returned_braininfo, returned_braininfo, {}
73+
) # Take action outputs is not used
74+
for agent_id in returned_braininfo["Ball3DBrain"].agents:
75+
assert trainer.evaluation_buffer[agent_id].last_brain_info is not None
76+
assert trainer.episode_steps[agent_id] > 0
77+
assert trainer.cumulative_rewards[agent_id] > 0
78+
# Test process_experiences by setting done
79+
returned_braininfo["Ball3DBrain"].local_done = 12 * [True]
80+
trainer.process_experiences(returned_braininfo, returned_braininfo)
81+
for agent_id in returned_braininfo["Ball3DBrain"].agents:
82+
assert trainer.episode_steps[agent_id] == 0
83+
assert trainer.cumulative_rewards[agent_id] == 0
84+
85+
86+
def test_bc_trainer_end_episode(dummy_config):
87+
trainer, env = create_bc_trainer(dummy_config)
88+
returned_braininfo = env.step()
89+
trainer.add_experiences(
90+
returned_braininfo, returned_braininfo, {}
91+
) # Take action outputs is not used
92+
trainer.process_experiences(returned_braininfo, returned_braininfo)
93+
# Should set everything to 0
94+
trainer.end_episode()
95+
for agent_id in returned_braininfo["Ball3DBrain"].agents:
96+
assert trainer.episode_steps[agent_id] == 0
97+
assert trainer.cumulative_rewards[agent_id] == 0
98+
99+
58100
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
59101
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
60102
def test_bc_policy_evaluate(mock_communicator, mock_launcher, dummy_config):

0 commit comments

Comments
 (0)