Merge pull request udacity#40 from udacity/smartcab

lcrucks · web-flow · commit 5d3de7f4afbb · 2016-06-23T13:40:26.000-07:00
smartcab: Fix penalty logic and add metrics reporting (experimental)
diff --git a/projects/smartcab/smartcab/analysis.py b/projects/smartcab/smartcab/analysis.py
@@ -0,0 +1,116 @@
+import time
+from collections import OrderedDict
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+class Metric(object):
+    """Named sequence of x and y values, with optional plotting helpers."""
+
+    def __init__(self, name):
+        self.name = name
+        self.reset()
+
+    def collect(self, x, y):
+        self.xdata.append(x)
+        self.ydata.append(y)
+
+    def plot(self, ax):
+        self.plot_obj, = ax.plot(self.xdata, self.ydata, 'o-', label=self.name)
+
+    def refresh(self):
+        self.plot_obj.set_data(self.xdata, self.ydata)
+
+    def reset(self):
+        self.xdata = []
+        self.ydata = []
+
+
+class Reporter(object):
+    """Collect metrics, analyze and report summary statistics."""
+
+    def __init__(self, metrics=[], live_plot=False):
+        self.metrics = OrderedDict()
+        self.live_plot = live_plot
+
+        for name in metrics:
+            self.metrics[name] = Metric(name)
+
+        if self.live_plot:
+            if not plt.isinteractive():
+                plt.ion()
+            self.plot()
+
+        print "Reporter.__init__(): Initialized with metrics: {}".format(metrics)  # [debug]
+
+    def collect(self, name, x, y):
+        if not name in self.metrics:
+            self.metrics[name] = Metric(name)
+            if self.live_plot:
+                self.metrics[name].plot(self.ax)
+                self.ax.legend()  # add new metric to legend
+            print "Reporter.collect(): New metric added: {}".format(name)  # [debug]
+        self.metrics[name].collect(x, y)
+        if self.live_plot:
+            self.metrics[name].refresh()
+
+    def plot(self):
+        if not hasattr(self, 'fig') or not hasattr(self, 'ax'):
+            self.fig, self.ax = plt.subplots()
+            for name in self.metrics:
+                self.metrics[name].plot(self.ax)
+            #self.ax.set_autoscalex_on(True)
+            #self.ax.set_autoscaley_on(True)
+            self.ax.grid()
+            self.ax.legend()
+        else:
+            for name in self.metrics:
+                self.metrics[name].refresh()
+        self.refresh_plot()
+
+    def refresh_plot(self):
+        self.ax.relim()
+        self.ax.autoscale_view()
+        self.fig.canvas.draw()
+        self.fig.canvas.flush_events()
+        plt.draw()
+
+    def show_plot(self):
+        if plt.isinteractive():
+            plt.ioff()
+        self.plot()
+        plt.show()
+
+    def summary(self):
+        return [pd.Series(metric.ydata, index=metric.xdata, name=name) for name, metric in self.metrics.iteritems()]
+
+    def reset(self):
+        for name in self.metrics:
+            self.metrics[name].reset()
+            if self.live_plot:
+                self.metrics[name].refresh()
+
+
+def test_reporter():
+    plt.ion()
+    rep = Reporter(metrics=['reward', 'flubber'], live_plot=True)
+    for i in xrange(100):
+        rep.collect('reward', i, np.random.random())
+        if i % 10 == 1:
+            rep.collect('flubber', i, np.random.random() * 2 + 1)
+            rep.refresh_plot()
+        time.sleep(0.01)
+    rep.plot()
+    summary = rep.summary()
+    print "Summary ({} metrics):-".format(len(summary))
+    for metric in summary:
+        print "Name: {}, samples: {}, type: {}".format(metric.name, len(metric), metric.dtype)
+        print "Mean: {}, s.d.: {}".format(metric.mean(), metric.std())
+        #print metric[:5]  # [debug]
+    plt.ioff()
+    plt.show()
+
+
+if __name__ == '__main__':
+    test_reporter()
diff --git a/projects/smartcab/smartcab/environment.py b/projects/smartcab/smartcab/environment.py
@@ -31,7 +31,10 @@ class Environment(object):
     valid_headings = [(1, 0), (0, -1), (-1, 0), (0, 1)]  # ENWS
     hard_time_limit = -100  # even if enforce_deadline is False, end trial when deadline reaches this value (to avoid deadlocks)
 
-    def __init__(self):
+    def __init__(self, num_dummies=3):
+        self.num_dummies = num_dummies  # no. of dummy agents
+        
+        # Initialize simulation variables
         self.done = False
         self.t = 0
         self.agent_states = OrderedDict()
@@ -55,14 +58,30 @@ def __init__(self):
                     self.roads.append((a, b))
 
         # Dummy agents
-        self.num_dummies = 3  # no. of dummy agents
         for i in xrange(self.num_dummies):
             self.create_agent(DummyAgent)
 
-        # Primary agent
+        # Primary agent and associated parameters
         self.primary_agent = None  # to be set explicitly
         self.enforce_deadline = False
 
+        # Step data (updated after each environment step)
+        self.step_data = {
+            't': 0,
+            'deadline': 0,
+            'waypoint': None,
+            'inputs': None,
+            'action': None,
+            'reward': 0.0
+        }
+
+        # Trial data (updated at the end of each trial)
+        self.trial_data = {
+            'net_reward': 0.0,  # total reward earned in current trial
+            'final_deadline': None,  # deadline value (time remaining)
+            'success': 0  # whether the agent reached the destination in time
+        }
+
     def create_agent(self, agent_class, *args, **kwargs):
         agent = agent_class(self, *args, **kwargs)
         self.agent_states[agent] = {'location': random.choice(self.intersections.keys()), 'heading': (0, 1)}
@@ -101,6 +120,11 @@ def reset(self):
                 'destination': destination if agent is self.primary_agent else None,
                 'deadline': deadline if agent is self.primary_agent else None}
             agent.reset(destination=(destination if agent is self.primary_agent else None))
+            if agent is self.primary_agent:
+                # Reset metrics for this trial (step data will be set during the step)
+                self.trial_data['net_reward'] = 0.0
+                self.trial_data['final_deadline'] = deadline
+                self.trial_data['success'] = 0
 
     def step(self):
         #print "Environment.step(): t = {}".format(self.t)  # [debug]
@@ -113,7 +137,9 @@ def step(self):
         for agent in self.agent_states.iterkeys():
             agent.update(self.t)
 
-        self.t += 1
+        if self.done:
+            return  # primary agent might have reached destination
+
         if self.primary_agent is not None:
             agent_deadline = self.agent_states[self.primary_agent]['deadline']
             if agent_deadline <= self.hard_time_limit:
@@ -124,6 +150,8 @@ def step(self):
                 print "Environment.step(): Primary agent ran out of time! Trial aborted."
             self.agent_states[self.primary_agent]['deadline'] = agent_deadline - 1
 
+        self.t += 1
+
     def sense(self, agent):
         assert agent in self.agent_states, "Unknown agent!"
 
@@ -150,7 +178,7 @@ def sense(self, agent):
                 if left != 'forward':  # we don't want to override left == 'forward'
                     left = other_heading
 
-        return {'light': light, 'oncoming': oncoming, 'left': left, 'right': right}  # TODO: make this a namedtuple
+        return {'light': light, 'oncoming': oncoming, 'left': left, 'right': right}
 
     def get_deadline(self, agent):
         return self.agent_states[agent]['deadline'] if agent is self.primary_agent else None
@@ -163,7 +191,7 @@ def act(self, agent, action):
         location = state['location']
         heading = state['heading']
         light = 'green' if (self.intersections[location].state and heading[1] != 0) or ((not self.intersections[location].state) and heading[0] != 0) else 'red'
-        sense = self.sense(agent)
+        inputs = self.sense(agent)
 
         # Move agent if within bounds and obeys traffic rules
         reward = 0  # reward/penalty
@@ -172,12 +200,12 @@ def act(self, agent, action):
             if light != 'green':
                 move_okay = False
         elif action == 'left':
-            if light == 'green' and (sense['oncoming'] == None or sense['oncoming'] == 'left'):
+            if light == 'green' and (inputs['oncoming'] == None or inputs['oncoming'] == 'left'):
                 heading = (heading[1], -heading[0])
             else:
                 move_okay = False
         elif action == 'right':
-            if light == 'green' or sense['left'] != 'straight':
+            if light == 'green' or (inputs['oncoming'] != 'left' and inputs['left'] != 'forward'):
                 heading = (-heading[1], heading[0])
             else:
                 move_okay = False
@@ -203,11 +231,22 @@ def act(self, agent, action):
             if state['location'] == state['destination']:
                 if state['deadline'] >= 0:
                     reward += 10  # bonus
+                    self.trial_data['success'] = 1
                 self.done = True
                 print "Environment.act(): Primary agent has reached destination!"  # [debug]
             self.status_text = "state: {}\naction: {}\nreward: {}".format(agent.get_state(), action, reward)
             #print "Environment.act() [POST]: location: {}, heading: {}, action: {}, reward: {}".format(location, heading, action, reward)  # [debug]
 
+            # Update metrics
+            self.step_data['t'] = self.t
+            self.trial_data['final_deadline'] = self.step_data['deadline'] = state['deadline']
+            self.step_data['waypoint'] = agent.get_next_waypoint()
+            self.step_data['inputs'] = inputs
+            self.step_data['action'] = action
+            self.step_data['reward'] = reward
+            self.trial_data['net_reward'] += reward
+            print "Environment.act(): Step data: {}".format(self.step_data)  # [debug]
+
         return reward
 
     def compute_dist(self, a, b):
diff --git a/projects/smartcab/smartcab/simulator.py b/projects/smartcab/smartcab/simulator.py
@@ -3,6 +3,10 @@
 import random
 import importlib
 
+import numpy as np
+
+from analysis import Reporter
+
 class Simulator(object):
     """Simulates agents in a dynamic smartcab environment.
 
@@ -21,7 +25,7 @@ class Simulator(object):
         'orange'  : (255, 128,   0)
     }
 
-    def __init__(self, env, size=None, update_delay=1.0, display=True):
+    def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=False):
         self.env = env
         self.size = size if size is not None else ((self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size)
         self.width, self.height = self.size
@@ -34,7 +38,7 @@ def __init__(self, env, size=None, update_delay=1.0, display=True):
         self.start_time = None
         self.current_time = 0.0
         self.last_updated = 0.0
-        self.update_delay = update_delay
+        self.update_delay = update_delay  # duration between each step (in secs)
 
         self.display = display
         if self.display:
@@ -59,8 +63,14 @@ def __init__(self, env, size=None, update_delay=1.0, display=True):
                 self.display = False
                 print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(e.__class__.__name__, e)
 
+        # Setup metrics to report
+        self.live_plot = live_plot
+        self.rep = Reporter(metrics=['net_reward', 'avg_net_reward', 'final_deadline', 'success'], live_plot=self.live_plot)
+        self.avg_net_reward_window = 10
+
     def run(self, n_trials=1):
         self.quit = False
+        self.rep.reset()
         for trial in xrange(n_trials):
             print "Simulator.run(): Trial {}".format(trial)  # [debug]
             self.env.reset()
@@ -90,6 +100,7 @@ def run(self, n_trials=1):
                     # Update environment
                     if self.current_time - self.last_updated >= self.update_delay:
                         self.env.step()
+                        # TODO: Log step data
                         self.last_updated = self.current_time
 
                     # Render GUI and sleep
@@ -105,6 +116,22 @@ def run(self, n_trials=1):
             if self.quit:
                 break
 
+            # Collect/update metrics
+            self.rep.collect('net_reward', trial, self.env.trial_data['net_reward'])  # total reward obtained in this trial
+            self.rep.collect('avg_net_reward', trial, np.mean(self.rep.metrics['net_reward'].ydata[-self.avg_net_reward_window:]))  # rolling mean of reward
+            self.rep.collect('final_deadline', trial, self.env.trial_data['final_deadline'])  # final deadline value (time remaining)
+            self.rep.collect('success', trial, self.env.trial_data['success'])
+            if self.live_plot:
+                self.rep.refresh_plot()  # autoscales axes, draws stuff and flushes events
+
+        # Report final metrics
+        if self.display:
+            self.pygame.display.quit()  # need to shutdown pygame before showing metrics plot
+            # TODO: Figure out why having both game and plot displays makes things crash!
+
+        if self.live_plot:
+            self.rep.show_plot()  # holds till user closes plot window
+
     def render(self):
         # Clear screen
         self.screen.fill(self.bg_color)