|
| 1 | +% Manuel Meraz |
| 2 | +% EECS 270 Robot Algorithms |
| 3 | +% Makov Decision Processes Controller for Simple Inverted Pendulum |
| 4 | +% S = Set States where row 1 is the discretized set of thetas and row 2 |
| 5 | +% is the discretized set of thetaDots (length is numStates) |
| 6 | + |
| 7 | +% vS = Set of states where every posible combination has a given column vector |
| 8 | +% where its length is S^2 |
| 9 | + |
| 10 | +% |
| 11 | + |
| 12 | +% If previous policy has been generated, load it from the Policies.mat file |
1 | 13 | try
|
2 | 14 | Policies = load('Policies.mat', 'Policies').Policies;
|
3 | 15 | catch
|
|
16 | 28 | noise.covariance = eye(dimensions) * 0.1;
|
17 | 29 |
|
18 | 30 | % State is a struct containing all state parameters
|
19 |
| -state.stateBounds = [-pi/4, pi/4; -1, 1]; |
| 31 | +setPoint = pi/2; |
| 32 | +state.stateBounds = [setPoint-pi/4, setPoint+pi/4; -5, 5]; |
20 | 33 | state.numStates = 3;
|
21 | 34 |
|
22 | 35 |
|
|
32 | 45 | A = [-100, -10, -2, 0, 2, 10, 100];
|
33 | 46 |
|
34 | 47 | % Set of states
|
35 |
| -S = [linspace(-pi/4, pi/4, state.numStates); linspace(-5, 5, state.numStates)]; |
| 48 | +S = [linspace(state.stateBounds(1,1), state.stateBounds(1,2), state.numStates);... |
| 49 | +linspace(state.stateBounds(2,1), state.stateBounds(2,2), state.numStates)]; |
36 | 50 |
|
37 | 51 | % Generate all possible state vectors
|
38 | 52 | [Thetas, ThetaDots] = meshgrid(S(1,:), S(2,:));
|
|
51 | 65 |
|
52 | 66 | end
|
53 | 67 |
|
54 |
| -theta = 0.0001; |
| 68 | +theta = pi/6; |
55 | 69 | thetaDot = 0;
|
56 | 70 | data(1,1) = theta;
|
57 |
| -%data(1,2) = thetaDot; |
58 |
| -%data(1,3) = getReward([theta;thetaDot]); |
| 71 | +data(1,2) = thetaDot; |
| 72 | +data(1,3) = getReward([theta;thetaDot]); |
59 | 73 |
|
| 74 | +u = 0; |
60 | 75 | s = mapToDiscreteValue(S, [theta;thetaDot]);
|
61 | 76 | e = 0.00001;
|
62 | 77 | for i = 1:length(Policy)
|
|
69 | 84 | end
|
70 | 85 | end
|
71 | 86 |
|
| 87 | +i = 2; |
| 88 | +FAIL = false; |
| 89 | +maxThetaDot = minThetaDot = thetaDot; |
| 90 | +meanTheta = maxTheta = minTheta = theta; |
72 | 91 | for i = 2:maxIterations
|
73 | 92 | sPrime = simulateOneStep(theta, thetaDot, dt, u);
|
74 |
| - theta = sPrime(1,1); |
| 93 | + meanTheta += theta = sPrime(1,1); |
| 94 | + |
| 95 | + % Avquire max and min theta |
| 96 | + if theta > maxTheta |
| 97 | + maxTheta = theta; |
| 98 | + end |
| 99 | + |
| 100 | + if theta < minTheta |
| 101 | + minTheta = theta; |
| 102 | + end |
| 103 | + |
| 104 | + if theta < setPoint - pi/4 || theta > setPoint + pi/4 |
| 105 | + FAIL = true; |
| 106 | + break; |
| 107 | + end |
75 | 108 | thetaDot = sPrime(2,1);
|
| 109 | + |
| 110 | + % Acquire max and min thetaDot |
| 111 | + if thetaDot > maxThetaDot |
| 112 | + maxThetaDot = thetaDot; |
| 113 | + end |
| 114 | + |
| 115 | + if thetaDot < minThetaDot |
| 116 | + minThetaDot = thetaDot; |
| 117 | + end |
| 118 | + |
76 | 119 | data (i,1) = theta;
|
77 |
| - %data(i,2) = thetaDot; |
78 |
| - %data(i, 3) = getReward([theta;thetaDot]); |
| 120 | + data(i,2) = thetaDot; |
| 121 | + data(i, 3) = getReward([theta;thetaDot]); |
79 | 122 |
|
80 | 123 | sPrime = mapToDiscreteValue(S, [theta;thetaDot]);
|
81 | 124 | thetaD = sPrime(1,1);
|
|
91 | 134 | end
|
92 | 135 | end
|
93 | 136 |
|
94 |
| -setenv("GNUTERM","qt"); |
| 137 | +meanTheta /= maxIterations; |
| 138 | + |
| 139 | +Policy |
| 140 | + |
| 141 | +if FAIL |
| 142 | + |
| 143 | + fprintf('Pendulum went out of bounds! Pendulum went out of bounds after %d iterations!\n\n\n', i) |
| 144 | +else |
| 145 | + fprintf('Pendulum ran was controlled beautifully for %d iterations! \n\n\n', i) |
| 146 | +end |
| 147 | + |
| 148 | +fprintf('Max Theta: %d\nMin Theta: %d\nMax ThetaDot: %d\nMin ThetaDot: %d\nMean Theta: %d\n',... |
| 149 | +maxTheta, minTheta, maxThetaDot, minThetaDot, meanTheta) |
| 150 | + |
| 151 | +fprintf('\n\n\n') |
| 152 | + |
95 | 153 | figure('Position',[0,0,1300,700]);
|
96 | 154 | h = plot(data, 'linewidth', 1);
|
97 | 155 | set(gca, "linewidth", 4, "fontsize", 12)
|
98 | 156 | title("Inverted Pendulum controlled with MDP");
|
99 | 157 | %legend('Theta', 'ThetaDot', 'Force', 'Reward');
|
100 |
| -%legend('Theta', 'ThetaDot', 'Reward'); |
101 |
| -legend('Theta'); |
| 158 | +legend('Theta', 'ThetaDot', 'Reward'); |
| 159 | +%legend('Theta'); |
102 | 160 | pause();
|
0 commit comments