18
18
def dummy_config ():
19
19
return yaml .safe_load (
20
20
"""
21
- hidden_units: 128
21
+ hidden_units: 32
22
22
learning_rate: 3.0e-4
23
- num_layers: 2
23
+ num_layers: 1
24
24
use_recurrent: false
25
25
sequence_length: 32
26
26
memory_size: 32
@@ -32,8 +32,8 @@ def dummy_config():
32
32
)
33
33
34
34
35
- @ mock . patch ( "mlagents.envs.UnityEnvironment" )
36
- def test_bc_trainer ( mock_env , dummy_config ):
35
+ def create_bc_trainer ( dummy_config ):
36
+ mock_env = mock . Mock ()
37
37
mock_brain = mb .create_mock_3dball_brain ()
38
38
mock_braininfo = mb .create_mock_braininfo (num_agents = 12 , num_vector_observations = 8 )
39
39
mb .setup_mock_unityenvironment (mock_env , mock_brain , mock_braininfo )
@@ -49,12 +49,54 @@ def test_bc_trainer(mock_env, dummy_config):
49
49
mock_brain , trainer_parameters , training = True , load = False , seed = 0 , run_id = 0
50
50
)
51
51
trainer .demonstration_buffer = mb .simulate_rollout (env , trainer .policy , 100 )
52
+ return trainer , env
53
+
54
+
55
+ def test_bc_trainer_step (dummy_config ):
56
+ trainer , env = create_bc_trainer (dummy_config )
57
+ # Test get_step
58
+ assert trainer .get_step == 0
59
+ # Test update policy
52
60
trainer .update_policy ()
53
61
assert len (trainer .stats ["Losses/Cloning Loss" ]) > 0
62
+ # Test increment step
54
63
trainer .increment_step (1 )
55
64
assert trainer .step == 1
56
65
57
66
67
+ def test_bc_trainer_add_proc_experiences (dummy_config ):
68
+ trainer , env = create_bc_trainer (dummy_config )
69
+ # Test add_experiences
70
+ returned_braininfo = env .step ()
71
+ trainer .add_experiences (
72
+ returned_braininfo , returned_braininfo , {}
73
+ ) # Take action outputs is not used
74
+ for agent_id in returned_braininfo ["Ball3DBrain" ].agents :
75
+ assert trainer .evaluation_buffer [agent_id ].last_brain_info is not None
76
+ assert trainer .episode_steps [agent_id ] > 0
77
+ assert trainer .cumulative_rewards [agent_id ] > 0
78
+ # Test process_experiences by setting done
79
+ returned_braininfo ["Ball3DBrain" ].local_done = 12 * [True ]
80
+ trainer .process_experiences (returned_braininfo , returned_braininfo )
81
+ for agent_id in returned_braininfo ["Ball3DBrain" ].agents :
82
+ assert trainer .episode_steps [agent_id ] == 0
83
+ assert trainer .cumulative_rewards [agent_id ] == 0
84
+
85
+
86
+ def test_bc_trainer_end_episode (dummy_config ):
87
+ trainer , env = create_bc_trainer (dummy_config )
88
+ returned_braininfo = env .step ()
89
+ trainer .add_experiences (
90
+ returned_braininfo , returned_braininfo , {}
91
+ ) # Take action outputs is not used
92
+ trainer .process_experiences (returned_braininfo , returned_braininfo )
93
+ # Should set everything to 0
94
+ trainer .end_episode ()
95
+ for agent_id in returned_braininfo ["Ball3DBrain" ].agents :
96
+ assert trainer .episode_steps [agent_id ] == 0
97
+ assert trainer .cumulative_rewards [agent_id ] == 0
98
+
99
+
58
100
@mock .patch ("mlagents.envs.UnityEnvironment.executable_launcher" )
59
101
@mock .patch ("mlagents.envs.UnityEnvironment.get_communicator" )
60
102
def test_bc_policy_evaluate (mock_communicator , mock_launcher , dummy_config ):
0 commit comments