ManuelMeraz
diff --git a/‎Policies.mat
Lines changed: 3 additions & 3 deletions b/‎Policies.mat
Lines changed: 3 additions & 3 deletions
diff --git a/‎StablePendulum.ps
-101 KB b/‎StablePendulum.ps
-101 KB
diff --git a/‎getReward.m
Lines changed: 5 additions & 7 deletions b/‎getReward.m
Lines changed: 5 additions & 7 deletions
diff --git a/‎main.m
Lines changed: 69 additions & 11 deletions b/‎main.m
Lines changed: 69 additions & 11 deletions
diff --git a/‎octave-workspace
-4.1 KB b/‎octave-workspace
-4.1 KB
diff --git a/‎untitled.pdf
-66.9 KB b/‎untitled.pdf
-66.9 KB
@@ -1,4 +1,4 @@
-# Created by Octave 4.2.1, Mon Oct 09 15:43:44 2017 PDT <manny@dixy>
+# Created by Octave 4.2.1, Mon Oct 09 17:20:14 2017 PDT <manny@dixy>
 # name: Policies
 # type: cell
 # rows: 1
@@ -21,9 +21,9 @@
 # type: matrix
 # rows: 3
 # columns: 9
- -0.7853981633974483 -0.7853981633974483 -0.7853981633974483 0 0 0 0.7853981633974483 0.7853981633974483 0.7853981633974483
+ 0.7853981633974483 0.7853981633974483 0.7853981633974483 1.570796326794897 1.570796326794897 1.570796326794897 2.356194490192345 2.356194490192345 2.356194490192345
  -5 0 5 -5 0 5 -5 0 5
- 100 100 -100 100 -10 -100 -100 -100 -100
+ 100 100 -100 -100 -100 -100 100 100 -100
 
 
 
 
@@ -1,22 +1,20 @@
 function r = getReward(s)
     theta = s(1,1);
     thetaDot = s(2,1);
+    setState = pi/2;
     r = 0;
 
-    if abs(theta) > pi/4
+    if theta > setState + pi/4 || theta < setState - pi/4
         r = -3;
-    elseif abs(theta) > pi/5
+    elseif theta > setState + pi/5 || theta < setState - pi/5
         r = -2;
-    elseif abs(theta) > pi/6
+    elseif theta > setState + pi/6 || theta < setState - pi/5
         r = -1;
     else
         r = 1;
     end
 
-    if thetaDot < 0 && thetaDot > 0
-        r += 1
-        
-    if theta <= 0.1 &&  theta >= -0.1
+    if theta <= setState + 0.1 &&  theta >= setState -0.1
         r *= 3;
     end
 
 
@@ -1,3 +1,15 @@
+% Manuel Meraz
+% EECS 270 Robot Algorithms
+% Makov Decision Processes Controller for Simple Inverted Pendulum
+% S = Set States where row 1 is the discretized set of thetas and row 2
+% is the discretized set of thetaDots (length is numStates)
+
+% vS = Set of states where every posible combination has a given column vector
+% where its length is S^2
+
+% 
+
+% If previous policy has been generated, load it from the Policies.mat file
 try
     Policies = load('Policies.mat', 'Policies').Policies;
 catch
@@ -16,7 +28,8 @@
 noise.covariance = eye(dimensions) * 0.1;
 
 % State is a struct containing all state parameters
-state.stateBounds = [-pi/4, pi/4; -1, 1];
+setPoint = pi/2;
+state.stateBounds = [setPoint-pi/4, setPoint+pi/4; -5, 5];
 state.numStates = 3;
 
 
@@ -32,7 +45,8 @@
 A = [-100, -10, -2, 0, 2, 10, 100];
 
 % Set of states
-S = [linspace(-pi/4, pi/4, state.numStates); linspace(-5, 5, state.numStates)];
+S = [linspace(state.stateBounds(1,1), state.stateBounds(1,2), state.numStates);...
+linspace(state.stateBounds(2,1), state.stateBounds(2,2), state.numStates)];
 
 % Generate all possible state vectors
 [Thetas, ThetaDots] = meshgrid(S(1,:), S(2,:));
@@ -51,12 +65,13 @@
 
 end
 
-theta = 0.0001;
+theta = pi/6;
 thetaDot = 0;
 data(1,1) = theta;
-%data(1,2) = thetaDot;
-%data(1,3) = getReward([theta;thetaDot]);
+data(1,2) = thetaDot;
+data(1,3) = getReward([theta;thetaDot]);
 
+u = 0;
 s = mapToDiscreteValue(S, [theta;thetaDot]);
 e = 0.00001;
 for i = 1:length(Policy)
@@ -69,13 +84,41 @@
     end
 end
 
+i = 2;
+FAIL = false;
+maxThetaDot = minThetaDot = thetaDot;
+meanTheta = maxTheta = minTheta = theta;
 for i = 2:maxIterations
     sPrime = simulateOneStep(theta, thetaDot, dt, u);
-    theta = sPrime(1,1);
+    meanTheta += theta = sPrime(1,1);
+
+    % Avquire max and min theta
+    if theta > maxTheta
+        maxTheta = theta;
+    end
+
+    if theta < minTheta
+        minTheta = theta;
+    end
+
+    if theta < setPoint - pi/4 || theta > setPoint + pi/4
+        FAIL = true;
+        break;
+    end
     thetaDot = sPrime(2,1);
+
+    % Acquire max and min thetaDot
+    if thetaDot > maxThetaDot
+        maxThetaDot = thetaDot;
+    end
+
+    if thetaDot < minThetaDot
+        minThetaDot = thetaDot;
+    end
+
     data (i,1) = theta;
-    %data(i,2) = thetaDot;
-    %data(i, 3) = getReward([theta;thetaDot]);
+    data(i,2) = thetaDot;
+    data(i, 3) = getReward([theta;thetaDot]);
 
     sPrime = mapToDiscreteValue(S, [theta;thetaDot]);
     thetaD = sPrime(1,1);
@@ -91,12 +134,27 @@
     end
 end
 
-setenv("GNUTERM","qt");
+meanTheta /= maxIterations;
+
+Policy
+
+if FAIL 
+
+    fprintf('Pendulum went out of bounds! Pendulum went out of bounds after %d iterations!\n\n\n', i)
+else
+    fprintf('Pendulum ran was controlled beautifully for %d iterations! \n\n\n', i)
+end
+
+fprintf('Max Theta: %d\nMin Theta: %d\nMax ThetaDot: %d\nMin ThetaDot: %d\nMean Theta: %d\n',...
+maxTheta, minTheta, maxThetaDot, minThetaDot, meanTheta)
+
+fprintf('\n\n\n')
+
 figure('Position',[0,0,1300,700]);
 h = plot(data, 'linewidth', 1);
 set(gca, "linewidth", 4, "fontsize", 12)
 title("Inverted Pendulum controlled with MDP");
 %legend('Theta', 'ThetaDot', 'Force', 'Reward');
-%legend('Theta', 'ThetaDot', 'Reward');
-legend('Theta');
+legend('Theta', 'ThetaDot', 'Reward');
+%legend('Theta');
 pause();