|
3 | 3 | { |
4 | 4 | "cell_type": "code", |
5 | 5 | "execution_count": 1, |
6 | | - "metadata": { |
7 | | - "collapsed": false |
8 | | - }, |
| 6 | + "metadata": {}, |
9 | 7 | "outputs": [], |
10 | 8 | "source": [ |
11 | 9 | "%matplotlib inline\n", |
|
31 | 29 | { |
32 | 30 | "cell_type": "code", |
33 | 31 | "execution_count": 2, |
34 | | - "metadata": { |
35 | | - "collapsed": false |
36 | | - }, |
| 32 | + "metadata": {}, |
37 | 33 | "outputs": [ |
38 | 34 | { |
39 | 35 | "name": "stderr", |
|
50 | 46 | { |
51 | 47 | "cell_type": "code", |
52 | 48 | "execution_count": 3, |
53 | | - "metadata": { |
54 | | - "collapsed": false |
55 | | - }, |
| 49 | + "metadata": {}, |
56 | 50 | "outputs": [ |
57 | 51 | { |
58 | 52 | "data": { |
|
74 | 68 | "scaler = sklearn.preprocessing.StandardScaler()\n", |
75 | 69 | "scaler.fit(observation_examples)\n", |
76 | 70 | "\n", |
77 | | - "# Used to converte a state to a featurizes represenation.\n", |
| 71 | + "# Used to convert a state to a featurizes represenation.\n", |
78 | 72 | "# We use RBF kernels with different variances to cover different parts of the space\n", |
79 | 73 | "featurizer = sklearn.pipeline.FeatureUnion([\n", |
80 | 74 | " (\"rbf1\", RBFSampler(gamma=5.0, n_components=100)),\n", |
|
88 | 82 | { |
89 | 83 | "cell_type": "code", |
90 | 84 | "execution_count": 4, |
91 | | - "metadata": { |
92 | | - "collapsed": false |
93 | | - }, |
| 85 | + "metadata": {}, |
94 | 86 | "outputs": [], |
95 | 87 | "source": [ |
96 | 88 | "class Estimator():\n", |
|
151 | 143 | { |
152 | 144 | "cell_type": "code", |
153 | 145 | "execution_count": 5, |
154 | | - "metadata": { |
155 | | - "collapsed": false |
156 | | - }, |
| 146 | + "metadata": {}, |
157 | 147 | "outputs": [], |
158 | 148 | "source": [ |
159 | 149 | "def make_epsilon_greedy_policy(estimator, epsilon, nA):\n", |
|
182 | 172 | { |
183 | 173 | "cell_type": "code", |
184 | 174 | "execution_count": 14, |
185 | | - "metadata": { |
186 | | - "collapsed": false |
187 | | - }, |
| 175 | + "metadata": {}, |
188 | 176 | "outputs": [], |
189 | 177 | "source": [ |
190 | 178 | "def q_learning(env, estimator, num_episodes, discount_factor=1.0, epsilon=0.1, epsilon_decay=1.0):\n", |
|
196 | 184 | " env: OpenAI environment.\n", |
197 | 185 | " estimator: Action-Value function estimator\n", |
198 | 186 | " num_episodes: Number of episodes to run for.\n", |
199 | | - " discount_factor: Lambda time discount factor.\n", |
| 187 | + " discount_factor: Gamma discount factor.\n", |
200 | 188 | " epsilon: Chance the sample a random action. Float betwen 0 and 1.\n", |
201 | 189 | " epsilon_decay: Each episode, epsilon is decayed by this factor\n", |
202 | 190 | " \n", |
|
283 | 271 | { |
284 | 272 | "cell_type": "code", |
285 | 273 | "execution_count": 16, |
286 | | - "metadata": { |
287 | | - "collapsed": false |
288 | | - }, |
| 274 | + "metadata": {}, |
289 | 275 | "outputs": [ |
290 | 276 | { |
291 | 277 | "name": "stdout", |
|
305 | 291 | { |
306 | 292 | "cell_type": "code", |
307 | 293 | "execution_count": 17, |
308 | | - "metadata": { |
309 | | - "collapsed": false |
310 | | - }, |
| 294 | + "metadata": {}, |
311 | 295 | "outputs": [ |
312 | 296 | { |
313 | 297 | "data": { |
|
384 | 368 | "name": "python", |
385 | 369 | "nbconvert_exporter": "python", |
386 | 370 | "pygments_lexer": "ipython3", |
387 | | - "version": "3.5.1" |
| 371 | + "version": "3.5.2" |
388 | 372 | } |
389 | 373 | }, |
390 | 374 | "nbformat": 4, |
391 | | - "nbformat_minor": 0 |
| 375 | + "nbformat_minor": 1 |
392 | 376 | } |
0 commit comments