Skip to content

Commit dc08252

Browse files
committed
setPoint implemented
1 parent c3aa97d commit dc08252

File tree

6 files changed

+77
-21
lines changed

6 files changed

+77
-21
lines changed

Policies.mat

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Created by Octave 4.2.1, Mon Oct 09 15:43:44 2017 PDT <manny@dixy>
1+
# Created by Octave 4.2.1, Mon Oct 09 17:20:14 2017 PDT <manny@dixy>
22
# name: Policies
33
# type: cell
44
# rows: 1
@@ -21,9 +21,9 @@
2121
# type: matrix
2222
# rows: 3
2323
# columns: 9
24-
-0.7853981633974483 -0.7853981633974483 -0.7853981633974483 0 0 0 0.7853981633974483 0.7853981633974483 0.7853981633974483
24+
0.7853981633974483 0.7853981633974483 0.7853981633974483 1.570796326794897 1.570796326794897 1.570796326794897 2.356194490192345 2.356194490192345 2.356194490192345
2525
-5 0 5 -5 0 5 -5 0 5
26-
100 100 -100 100 -10 -100 -100 -100 -100
26+
100 100 -100 -100 -100 -100 100 100 -100
2727

2828

2929

StablePendulum.ps

-101 KB
Binary file not shown.

getReward.m

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,20 @@
11
function r = getReward(s)
22
theta = s(1,1);
33
thetaDot = s(2,1);
4+
setState = pi/2;
45
r = 0;
56

6-
if abs(theta) > pi/4
7+
if theta > setState + pi/4 || theta < setState - pi/4
78
r = -3;
8-
elseif abs(theta) > pi/5
9+
elseif theta > setState + pi/5 || theta < setState - pi/5
910
r = -2;
10-
elseif abs(theta) > pi/6
11+
elseif theta > setState + pi/6 || theta < setState - pi/5
1112
r = -1;
1213
else
1314
r = 1;
1415
end
1516

16-
if thetaDot < 0 && thetaDot > 0
17-
r += 1
18-
19-
if theta <= 0.1 && theta >= -0.1
17+
if theta <= setState + 0.1 && theta >= setState -0.1
2018
r *= 3;
2119
end
2220

main.m

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
% Manuel Meraz
2+
% EECS 270 Robot Algorithms
3+
% Makov Decision Processes Controller for Simple Inverted Pendulum
4+
% S = Set States where row 1 is the discretized set of thetas and row 2
5+
% is the discretized set of thetaDots (length is numStates)
6+
7+
% vS = Set of states where every posible combination has a given column vector
8+
% where its length is S^2
9+
10+
%
11+
12+
% If previous policy has been generated, load it from the Policies.mat file
113
try
214
Policies = load('Policies.mat', 'Policies').Policies;
315
catch
@@ -16,7 +28,8 @@
1628
noise.covariance = eye(dimensions) * 0.1;
1729

1830
% State is a struct containing all state parameters
19-
state.stateBounds = [-pi/4, pi/4; -1, 1];
31+
setPoint = pi/2;
32+
state.stateBounds = [setPoint-pi/4, setPoint+pi/4; -5, 5];
2033
state.numStates = 3;
2134

2235

@@ -32,7 +45,8 @@
3245
A = [-100, -10, -2, 0, 2, 10, 100];
3346

3447
% Set of states
35-
S = [linspace(-pi/4, pi/4, state.numStates); linspace(-5, 5, state.numStates)];
48+
S = [linspace(state.stateBounds(1,1), state.stateBounds(1,2), state.numStates);...
49+
linspace(state.stateBounds(2,1), state.stateBounds(2,2), state.numStates)];
3650

3751
% Generate all possible state vectors
3852
[Thetas, ThetaDots] = meshgrid(S(1,:), S(2,:));
@@ -51,12 +65,13 @@
5165

5266
end
5367

54-
theta = 0.0001;
68+
theta = pi/6;
5569
thetaDot = 0;
5670
data(1,1) = theta;
57-
%data(1,2) = thetaDot;
58-
%data(1,3) = getReward([theta;thetaDot]);
71+
data(1,2) = thetaDot;
72+
data(1,3) = getReward([theta;thetaDot]);
5973

74+
u = 0;
6075
s = mapToDiscreteValue(S, [theta;thetaDot]);
6176
e = 0.00001;
6277
for i = 1:length(Policy)
@@ -69,13 +84,41 @@
6984
end
7085
end
7186

87+
i = 2;
88+
FAIL = false;
89+
maxThetaDot = minThetaDot = thetaDot;
90+
meanTheta = maxTheta = minTheta = theta;
7291
for i = 2:maxIterations
7392
sPrime = simulateOneStep(theta, thetaDot, dt, u);
74-
theta = sPrime(1,1);
93+
meanTheta += theta = sPrime(1,1);
94+
95+
% Avquire max and min theta
96+
if theta > maxTheta
97+
maxTheta = theta;
98+
end
99+
100+
if theta < minTheta
101+
minTheta = theta;
102+
end
103+
104+
if theta < setPoint - pi/4 || theta > setPoint + pi/4
105+
FAIL = true;
106+
break;
107+
end
75108
thetaDot = sPrime(2,1);
109+
110+
% Acquire max and min thetaDot
111+
if thetaDot > maxThetaDot
112+
maxThetaDot = thetaDot;
113+
end
114+
115+
if thetaDot < minThetaDot
116+
minThetaDot = thetaDot;
117+
end
118+
76119
data (i,1) = theta;
77-
%data(i,2) = thetaDot;
78-
%data(i, 3) = getReward([theta;thetaDot]);
120+
data(i,2) = thetaDot;
121+
data(i, 3) = getReward([theta;thetaDot]);
79122

80123
sPrime = mapToDiscreteValue(S, [theta;thetaDot]);
81124
thetaD = sPrime(1,1);
@@ -91,12 +134,27 @@
91134
end
92135
end
93136

94-
setenv("GNUTERM","qt");
137+
meanTheta /= maxIterations;
138+
139+
Policy
140+
141+
if FAIL
142+
143+
fprintf('Pendulum went out of bounds! Pendulum went out of bounds after %d iterations!\n\n\n', i)
144+
else
145+
fprintf('Pendulum ran was controlled beautifully for %d iterations! \n\n\n', i)
146+
end
147+
148+
fprintf('Max Theta: %d\nMin Theta: %d\nMax ThetaDot: %d\nMin ThetaDot: %d\nMean Theta: %d\n',...
149+
maxTheta, minTheta, maxThetaDot, minThetaDot, meanTheta)
150+
151+
fprintf('\n\n\n')
152+
95153
figure('Position',[0,0,1300,700]);
96154
h = plot(data, 'linewidth', 1);
97155
set(gca, "linewidth", 4, "fontsize", 12)
98156
title("Inverted Pendulum controlled with MDP");
99157
%legend('Theta', 'ThetaDot', 'Force', 'Reward');
100-
%legend('Theta', 'ThetaDot', 'Reward');
101-
legend('Theta');
158+
legend('Theta', 'ThetaDot', 'Reward');
159+
%legend('Theta');
102160
pause();

octave-workspace

-4.1 KB
Binary file not shown.

untitled.pdf

-66.9 KB
Binary file not shown.

0 commit comments

Comments
 (0)