Skip to content

Commit 6f77cda

Browse files
committed
added RLLibViz configure file
1 parent 5776cd6 commit 6f77cda

File tree

2 files changed

+223
-0
lines changed

2 files changed

+223
-0
lines changed
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/*
2+
* NonMarkovPoleBalancing.h
3+
*
4+
* Created on: Oct 19, 2013
5+
* Author: sam
6+
*/
7+
8+
#ifndef NONMARKOVPOLEBALANCING_H_
9+
#define NONMARKOVPOLEBALANCING_H_
10+
11+
#include "Environment.h"
12+
13+
/**
14+
* Incremental Evolution of Complex General Behavior
15+
* Faustino Gomez and Risto Miikkulainen, 1996
16+
*
17+
*/
18+
class NonMarkovPoleBalancing: public Environment<float>
19+
{
20+
protected:
21+
int nbUnjointedPoles;
22+
float stepTime; // s
23+
float x; // m
24+
float xDot; // ms^{-1}
25+
float g; // ms^{-2}
26+
float M; // Kg
27+
float muc; // coefficient of friction of cart on track
28+
29+
Range<float>* xRange;
30+
Range<float>* thetaRange;
31+
Range<float>* actionRange;
32+
33+
public:
34+
DenseVector<float>* theta;
35+
DenseVector<float>* thetaDot;
36+
DenseVector<float>* thetaDotDot;
37+
DenseVector<float>* l; // half length of i^{th} pole
38+
DenseVector<float>* f; // effective force
39+
DenseVector<float>* m; // mass of i^{th} pole
40+
DenseVector<float>* mm; // effective mass
41+
DenseVector<float>* mup; // coefficient of friction of i^{th} pole's hinge
42+
43+
public:
44+
NonMarkovPoleBalancing(const int& nbUnjointedPoles = 1) :
45+
Environment<float>(1 + nbUnjointedPoles, 3, 1), nbUnjointedPoles(nbUnjointedPoles), stepTime(
46+
0.01), x(0), xDot(0), g(-9.81), M(1.0), muc(0), xRange(new Range<float>(-2.4f, 2.4f)), actionRange(
47+
new Range<float>(-10.0f, 10.0f)), theta(new DenseVector<float>(nbUnjointedPoles)), thetaDot(
48+
new DenseVector<float>(nbUnjointedPoles)), thetaDotDot(
49+
new DenseVector<float>(nbUnjointedPoles)), l(new DenseVector<float>(nbUnjointedPoles)), f(
50+
new DenseVector<float>(nbUnjointedPoles)), m(new DenseVector<float>(nbUnjointedPoles)), mm(
51+
new DenseVector<float>(nbUnjointedPoles)), mup(new DenseVector<float>(nbUnjointedPoles))
52+
{
53+
if (nbUnjointedPoles == 2)
54+
{
55+
thetaRange = new Range<float>(-15.0f / 180.0f * M_PI, 15.0f / 180.0f * M_PI);
56+
l->at(0) = 0.5;
57+
l->at(1) = 0.05;
58+
m->at(0) = 0.1;
59+
m->at(1) = 0.01;
60+
muc = 0.0005f;
61+
mup->at(0) = mup->at(1) = 0.000002f;
62+
}
63+
else
64+
{
65+
thetaRange = new Range<float>(-12.0f / 180.0f * M_PI, 12.0f / 180.0f * M_PI);
66+
l->at(0) = 0.5; // Kg
67+
m->at(0) = 0.1; // Kg
68+
muc = 0.0;
69+
}
70+
discreteActions->push_back(0, actionRange->min());
71+
discreteActions->push_back(1, 0.0);
72+
discreteActions->push_back(2, actionRange->max());
73+
74+
// subject to change
75+
continuousActions->push_back(0, 0.0);
76+
77+
for (int i = 0; i < getVars().dimension(); i++)
78+
resolutions->at(i) = 6.0;
79+
80+
}
81+
82+
~NonMarkovPoleBalancing()
83+
{
84+
delete xRange;
85+
delete thetaRange;
86+
delete actionRange;
87+
delete theta;
88+
delete thetaDot;
89+
delete thetaDotDot;
90+
delete l;
91+
delete f;
92+
delete m;
93+
delete mm;
94+
delete mup;
95+
}
96+
97+
private:
98+
void adjustTheta()
99+
{
100+
for (int i = 0; i < nbUnjointedPoles; i++)
101+
{
102+
if (theta->at(i) >= M_PI)
103+
theta->at(i) -= 2.0 * M_PI;
104+
if (theta->at(i) < -M_PI)
105+
theta->at(i) += 2.0 * M_PI;
106+
}
107+
}
108+
109+
public:
110+
void updateRTStep()
111+
{
112+
DenseVector<float>& vars = *output->o_tp1;
113+
vars[0] = (x - xRange->min()) * resolutions->at(0) / xRange->length();
114+
observations->at(0) = x;
115+
for (int i = 0; i < nbUnjointedPoles; i++)
116+
{
117+
vars[i + 1] = (theta->at(i) - thetaRange->min()) * resolutions->at(i + 1)
118+
/ thetaRange->length();
119+
observations->at(1) = theta->at(i);
120+
}
121+
}
122+
123+
void initialize()
124+
{
125+
x = 0.0;
126+
for (int i = 0; i < nbUnjointedPoles; i++)
127+
theta->at(i) = 0.0f;
128+
129+
adjustTheta();
130+
updateRTStep();
131+
}
132+
133+
void step(const Action& a)
134+
{
135+
float totalEffectiveForce = 0;
136+
float totalEffectiveMass = 0;
137+
for (int i = 0; i < nbUnjointedPoles; i++)
138+
{
139+
float effectiveForce = 0;
140+
effectiveForce += m->at(i) * l->at(i) * std::pow(thetaDot->at(i), 2) * ::sin(theta->at(i));
141+
effectiveForce += 0.75f * m->at(i) * ::cos(theta->at(i))
142+
* ((mup->at(i) * theta->at(i)) / (m->at(i) * l->at(i)) + g * ::sin(theta->at(i)));
143+
f->at(i) = effectiveForce;
144+
mm->at(i) = m->at(i) * (1.0f - 0.75f * std::pow(::cos(theta->at(i)), 2));
145+
totalEffectiveForce += f->at(i);
146+
totalEffectiveMass += mm->at(i);
147+
}
148+
149+
float torque = actionRange->bound(a.at());
150+
float xAcc = (torque - muc * Signum::valueOf(xDot) + totalEffectiveForce)
151+
/ (M + totalEffectiveMass);
152+
153+
// Update the four state variables, using Euler's method.
154+
x = xRange->bound(x + xDot * stepTime);
155+
xDot += xAcc * stepTime;
156+
157+
for (int i = 0; i < nbUnjointedPoles; i++)
158+
{
159+
thetaDotDot->at(i) = -0.75f
160+
* (xAcc * ::cos(theta->at(i)) + g * ::sin(theta->at(i))
161+
+ (mup->at(i) * theta->at(i)) / (m->at(i) * l->at(i))) / l->at(i);
162+
163+
// Update the four state variables, using Euler's method.
164+
theta->at(i) = thetaRange->bound(theta->at(i) + thetaDot->at(i) * stepTime);
165+
thetaDot->at(i) += thetaDotDot->at(i) * stepTime;
166+
}
167+
168+
adjustTheta();
169+
updateRTStep();
170+
}
171+
172+
bool endOfEpisode() const
173+
{
174+
bool value = true;
175+
for (int i = 0; i < nbUnjointedPoles; i++)
176+
value *= thetaRange->in(theta->at(i));
177+
value = value && xRange->in(x);
178+
return !value;
179+
}
180+
181+
float r() const
182+
{
183+
float value = 0;
184+
for (int i = 0; i < nbUnjointedPoles; i++)
185+
value += ::cos(theta->at(i));
186+
return value;
187+
}
188+
189+
float z() const
190+
{
191+
return 0.0;
192+
}
193+
194+
};
195+
196+
#endif /* NONMARKOVPOLEBALANCING_H_ */

visualization/RLLibViz/configure

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
3+
echo "RLLibVizContinuousGridworld"
4+
5+
qmake RLLibVizContinuousGridworld.pro
6+
7+
make
8+
9+
make clean
10+
11+
echo "RLLibVizMountainCar"
12+
13+
qmake RLLibVizMountainCar.pro
14+
15+
make
16+
17+
make clean
18+
19+
echo "RLLibVizSwingPendulum"
20+
21+
qmake RLLibVizSwingPendulum.pro
22+
23+
make
24+
25+
make clean
26+
27+
rm Makefile

0 commit comments

Comments
 (0)