@@ -65,23 +65,6 @@ def __init__(self, num_dummies=3):
65
65
self .primary_agent = None # to be set explicitly
66
66
self .enforce_deadline = False
67
67
68
- # Step data (updated after each environment step)
69
- self .step_data = {
70
- 't' : 0 ,
71
- 'deadline' : 0 ,
72
- 'waypoint' : None ,
73
- 'inputs' : None ,
74
- 'action' : None ,
75
- 'reward' : 0.0
76
- }
77
-
78
- # Trial data (updated at the end of each trial)
79
- self .trial_data = {
80
- 'net_reward' : 0.0 , # total reward earned in current trial
81
- 'final_deadline' : None , # deadline value (time remaining)
82
- 'success' : 0 # whether the agent reached the destination in time
83
- }
84
-
85
68
def create_agent (self , agent_class , * args , ** kwargs ):
86
69
agent = agent_class (self , * args , ** kwargs )
87
70
self .agent_states [agent ] = {'location' : random .choice (self .intersections .keys ()), 'heading' : (0 , 1 )}
@@ -120,11 +103,6 @@ def reset(self):
120
103
'destination' : destination if agent is self .primary_agent else None ,
121
104
'deadline' : deadline if agent is self .primary_agent else None }
122
105
agent .reset (destination = (destination if agent is self .primary_agent else None ))
123
- if agent is self .primary_agent :
124
- # Reset metrics for this trial (step data will be set during the step)
125
- self .trial_data ['net_reward' ] = 0.0
126
- self .trial_data ['final_deadline' ] = deadline
127
- self .trial_data ['success' ] = 0
128
106
129
107
def step (self ):
130
108
#print "Environment.step(): t = {}".format(self.t) # [debug]
@@ -231,22 +209,11 @@ def act(self, agent, action):
231
209
if state ['location' ] == state ['destination' ]:
232
210
if state ['deadline' ] >= 0 :
233
211
reward += 10 # bonus
234
- self .trial_data ['success' ] = 1
235
212
self .done = True
236
213
print "Environment.act(): Primary agent has reached destination!" # [debug]
237
214
self .status_text = "state: {}\n action: {}\n reward: {}" .format (agent .get_state (), action , reward )
238
215
#print "Environment.act() [POST]: location: {}, heading: {}, action: {}, reward: {}".format(location, heading, action, reward) # [debug]
239
216
240
- # Update metrics
241
- self .step_data ['t' ] = self .t
242
- self .trial_data ['final_deadline' ] = self .step_data ['deadline' ] = state ['deadline' ]
243
- self .step_data ['waypoint' ] = agent .get_next_waypoint ()
244
- self .step_data ['inputs' ] = inputs
245
- self .step_data ['action' ] = action
246
- self .step_data ['reward' ] = reward
247
- self .trial_data ['net_reward' ] += reward
248
- print "Environment.act(): Step data: {}" .format (self .step_data ) # [debug]
249
-
250
217
return reward
251
218
252
219
def compute_dist (self , a , b ):
0 commit comments