|
| 1 | +/* |
| 2 | + * NonMarkovPoleBalancing.h |
| 3 | + * |
| 4 | + * Created on: Oct 19, 2013 |
| 5 | + * Author: sam |
| 6 | + */ |
| 7 | + |
| 8 | +#ifndef NONMARKOVPOLEBALANCING_H_ |
| 9 | +#define NONMARKOVPOLEBALANCING_H_ |
| 10 | + |
| 11 | +#include "Environment.h" |
| 12 | + |
| 13 | +/** |
| 14 | + * Incremental Evolution of Complex General Behavior |
| 15 | + * Faustino Gomez and Risto Miikkulainen, 1996 |
| 16 | + * |
| 17 | + */ |
| 18 | +class NonMarkovPoleBalancing: public Environment<float> |
| 19 | +{ |
| 20 | + protected: |
| 21 | + int nbUnjointedPoles; |
| 22 | + float stepTime; // s |
| 23 | + float x; // m |
| 24 | + float xDot; // ms^{-1} |
| 25 | + float g; // ms^{-2} |
| 26 | + float M; // Kg |
| 27 | + float muc; // coefficient of friction of cart on track |
| 28 | + |
| 29 | + Range<float>* xRange; |
| 30 | + Range<float>* thetaRange; |
| 31 | + Range<float>* actionRange; |
| 32 | + |
| 33 | + public: |
| 34 | + DenseVector<float>* theta; |
| 35 | + DenseVector<float>* thetaDot; |
| 36 | + DenseVector<float>* thetaDotDot; |
| 37 | + DenseVector<float>* l; // half length of i^{th} pole |
| 38 | + DenseVector<float>* f; // effective force |
| 39 | + DenseVector<float>* m; // mass of i^{th} pole |
| 40 | + DenseVector<float>* mm; // effective mass |
| 41 | + DenseVector<float>* mup; // coefficient of friction of i^{th} pole's hinge |
| 42 | + |
| 43 | + public: |
| 44 | + NonMarkovPoleBalancing(const int& nbUnjointedPoles = 1) : |
| 45 | + Environment<float>(1 + nbUnjointedPoles, 3, 1), nbUnjointedPoles(nbUnjointedPoles), stepTime( |
| 46 | + 0.01), x(0), xDot(0), g(-9.81), M(1.0), muc(0), xRange(new Range<float>(-2.4f, 2.4f)), actionRange( |
| 47 | + new Range<float>(-10.0f, 10.0f)), theta(new DenseVector<float>(nbUnjointedPoles)), thetaDot( |
| 48 | + new DenseVector<float>(nbUnjointedPoles)), thetaDotDot( |
| 49 | + new DenseVector<float>(nbUnjointedPoles)), l(new DenseVector<float>(nbUnjointedPoles)), f( |
| 50 | + new DenseVector<float>(nbUnjointedPoles)), m(new DenseVector<float>(nbUnjointedPoles)), mm( |
| 51 | + new DenseVector<float>(nbUnjointedPoles)), mup(new DenseVector<float>(nbUnjointedPoles)) |
| 52 | + { |
| 53 | + if (nbUnjointedPoles == 2) |
| 54 | + { |
| 55 | + thetaRange = new Range<float>(-15.0f / 180.0f * M_PI, 15.0f / 180.0f * M_PI); |
| 56 | + l->at(0) = 0.5; |
| 57 | + l->at(1) = 0.05; |
| 58 | + m->at(0) = 0.1; |
| 59 | + m->at(1) = 0.01; |
| 60 | + muc = 0.0005f; |
| 61 | + mup->at(0) = mup->at(1) = 0.000002f; |
| 62 | + } |
| 63 | + else |
| 64 | + { |
| 65 | + thetaRange = new Range<float>(-12.0f / 180.0f * M_PI, 12.0f / 180.0f * M_PI); |
| 66 | + l->at(0) = 0.5; // Kg |
| 67 | + m->at(0) = 0.1; // Kg |
| 68 | + muc = 0.0; |
| 69 | + } |
| 70 | + discreteActions->push_back(0, actionRange->min()); |
| 71 | + discreteActions->push_back(1, 0.0); |
| 72 | + discreteActions->push_back(2, actionRange->max()); |
| 73 | + |
| 74 | + // subject to change |
| 75 | + continuousActions->push_back(0, 0.0); |
| 76 | + |
| 77 | + for (int i = 0; i < getVars().dimension(); i++) |
| 78 | + resolutions->at(i) = 6.0; |
| 79 | + |
| 80 | + } |
| 81 | + |
| 82 | + ~NonMarkovPoleBalancing() |
| 83 | + { |
| 84 | + delete xRange; |
| 85 | + delete thetaRange; |
| 86 | + delete actionRange; |
| 87 | + delete theta; |
| 88 | + delete thetaDot; |
| 89 | + delete thetaDotDot; |
| 90 | + delete l; |
| 91 | + delete f; |
| 92 | + delete m; |
| 93 | + delete mm; |
| 94 | + delete mup; |
| 95 | + } |
| 96 | + |
| 97 | + private: |
| 98 | + void adjustTheta() |
| 99 | + { |
| 100 | + for (int i = 0; i < nbUnjointedPoles; i++) |
| 101 | + { |
| 102 | + if (theta->at(i) >= M_PI) |
| 103 | + theta->at(i) -= 2.0 * M_PI; |
| 104 | + if (theta->at(i) < -M_PI) |
| 105 | + theta->at(i) += 2.0 * M_PI; |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + public: |
| 110 | + void updateRTStep() |
| 111 | + { |
| 112 | + DenseVector<float>& vars = *output->o_tp1; |
| 113 | + vars[0] = (x - xRange->min()) * resolutions->at(0) / xRange->length(); |
| 114 | + observations->at(0) = x; |
| 115 | + for (int i = 0; i < nbUnjointedPoles; i++) |
| 116 | + { |
| 117 | + vars[i + 1] = (theta->at(i) - thetaRange->min()) * resolutions->at(i + 1) |
| 118 | + / thetaRange->length(); |
| 119 | + observations->at(1) = theta->at(i); |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + void initialize() |
| 124 | + { |
| 125 | + x = 0.0; |
| 126 | + for (int i = 0; i < nbUnjointedPoles; i++) |
| 127 | + theta->at(i) = 0.0f; |
| 128 | + |
| 129 | + adjustTheta(); |
| 130 | + updateRTStep(); |
| 131 | + } |
| 132 | + |
| 133 | + void step(const Action& a) |
| 134 | + { |
| 135 | + float totalEffectiveForce = 0; |
| 136 | + float totalEffectiveMass = 0; |
| 137 | + for (int i = 0; i < nbUnjointedPoles; i++) |
| 138 | + { |
| 139 | + float effectiveForce = 0; |
| 140 | + effectiveForce += m->at(i) * l->at(i) * std::pow(thetaDot->at(i), 2) * ::sin(theta->at(i)); |
| 141 | + effectiveForce += 0.75f * m->at(i) * ::cos(theta->at(i)) |
| 142 | + * ((mup->at(i) * theta->at(i)) / (m->at(i) * l->at(i)) + g * ::sin(theta->at(i))); |
| 143 | + f->at(i) = effectiveForce; |
| 144 | + mm->at(i) = m->at(i) * (1.0f - 0.75f * std::pow(::cos(theta->at(i)), 2)); |
| 145 | + totalEffectiveForce += f->at(i); |
| 146 | + totalEffectiveMass += mm->at(i); |
| 147 | + } |
| 148 | + |
| 149 | + float torque = actionRange->bound(a.at()); |
| 150 | + float xAcc = (torque - muc * Signum::valueOf(xDot) + totalEffectiveForce) |
| 151 | + / (M + totalEffectiveMass); |
| 152 | + |
| 153 | + // Update the four state variables, using Euler's method. |
| 154 | + x = xRange->bound(x + xDot * stepTime); |
| 155 | + xDot += xAcc * stepTime; |
| 156 | + |
| 157 | + for (int i = 0; i < nbUnjointedPoles; i++) |
| 158 | + { |
| 159 | + thetaDotDot->at(i) = -0.75f |
| 160 | + * (xAcc * ::cos(theta->at(i)) + g * ::sin(theta->at(i)) |
| 161 | + + (mup->at(i) * theta->at(i)) / (m->at(i) * l->at(i))) / l->at(i); |
| 162 | + |
| 163 | + // Update the four state variables, using Euler's method. |
| 164 | + theta->at(i) = thetaRange->bound(theta->at(i) + thetaDot->at(i) * stepTime); |
| 165 | + thetaDot->at(i) += thetaDotDot->at(i) * stepTime; |
| 166 | + } |
| 167 | + |
| 168 | + adjustTheta(); |
| 169 | + updateRTStep(); |
| 170 | + } |
| 171 | + |
| 172 | + bool endOfEpisode() const |
| 173 | + { |
| 174 | + bool value = true; |
| 175 | + for (int i = 0; i < nbUnjointedPoles; i++) |
| 176 | + value *= thetaRange->in(theta->at(i)); |
| 177 | + value = value && xRange->in(x); |
| 178 | + return !value; |
| 179 | + } |
| 180 | + |
| 181 | + float r() const |
| 182 | + { |
| 183 | + float value = 0; |
| 184 | + for (int i = 0; i < nbUnjointedPoles; i++) |
| 185 | + value += ::cos(theta->at(i)); |
| 186 | + return value; |
| 187 | + } |
| 188 | + |
| 189 | + float z() const |
| 190 | + { |
| 191 | + return 0.0; |
| 192 | + } |
| 193 | + |
| 194 | +}; |
| 195 | + |
| 196 | +#endif /* NONMARKOVPOLEBALANCING_H_ */ |
0 commit comments