Skip to content

Commit 3fce6b5

Browse files
author
Alex
committed
Updated Readme. Changed Lambda to Gamma
1 parent 74d301c commit 3fce6b5

File tree

3 files changed

+25
-47
lines changed

3 files changed

+25
-47
lines changed

FA/Q-Learning with Value Function Approximation Solution.ipynb

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 1,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"%matplotlib inline\n",
@@ -31,9 +29,7 @@
3129
{
3230
"cell_type": "code",
3331
"execution_count": 2,
34-
"metadata": {
35-
"collapsed": false
36-
},
32+
"metadata": {},
3733
"outputs": [
3834
{
3935
"name": "stderr",
@@ -50,9 +46,7 @@
5046
{
5147
"cell_type": "code",
5248
"execution_count": 3,
53-
"metadata": {
54-
"collapsed": false
55-
},
49+
"metadata": {},
5650
"outputs": [
5751
{
5852
"data": {
@@ -74,7 +68,7 @@
7468
"scaler = sklearn.preprocessing.StandardScaler()\n",
7569
"scaler.fit(observation_examples)\n",
7670
"\n",
77-
"# Used to converte a state to a featurizes represenation.\n",
71+
"# Used to convert a state to a featurizes represenation.\n",
7872
"# We use RBF kernels with different variances to cover different parts of the space\n",
7973
"featurizer = sklearn.pipeline.FeatureUnion([\n",
8074
" (\"rbf1\", RBFSampler(gamma=5.0, n_components=100)),\n",
@@ -88,9 +82,7 @@
8882
{
8983
"cell_type": "code",
9084
"execution_count": 4,
91-
"metadata": {
92-
"collapsed": false
93-
},
85+
"metadata": {},
9486
"outputs": [],
9587
"source": [
9688
"class Estimator():\n",
@@ -151,9 +143,7 @@
151143
{
152144
"cell_type": "code",
153145
"execution_count": 5,
154-
"metadata": {
155-
"collapsed": false
156-
},
146+
"metadata": {},
157147
"outputs": [],
158148
"source": [
159149
"def make_epsilon_greedy_policy(estimator, epsilon, nA):\n",
@@ -182,9 +172,7 @@
182172
{
183173
"cell_type": "code",
184174
"execution_count": 14,
185-
"metadata": {
186-
"collapsed": false
187-
},
175+
"metadata": {},
188176
"outputs": [],
189177
"source": [
190178
"def q_learning(env, estimator, num_episodes, discount_factor=1.0, epsilon=0.1, epsilon_decay=1.0):\n",
@@ -196,7 +184,7 @@
196184
" env: OpenAI environment.\n",
197185
" estimator: Action-Value function estimator\n",
198186
" num_episodes: Number of episodes to run for.\n",
199-
" discount_factor: Lambda time discount factor.\n",
187+
" discount_factor: Gamma discount factor.\n",
200188
" epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
201189
" epsilon_decay: Each episode, epsilon is decayed by this factor\n",
202190
" \n",
@@ -283,9 +271,7 @@
283271
{
284272
"cell_type": "code",
285273
"execution_count": 16,
286-
"metadata": {
287-
"collapsed": false
288-
},
274+
"metadata": {},
289275
"outputs": [
290276
{
291277
"name": "stdout",
@@ -305,9 +291,7 @@
305291
{
306292
"cell_type": "code",
307293
"execution_count": 17,
308-
"metadata": {
309-
"collapsed": false
310-
},
294+
"metadata": {},
311295
"outputs": [
312296
{
313297
"data": {
@@ -384,9 +368,9 @@
384368
"name": "python",
385369
"nbconvert_exporter": "python",
386370
"pygments_lexer": "ipython3",
387-
"version": "3.5.1"
371+
"version": "3.5.2"
388372
}
389373
},
390374
"nbformat": 4,
391-
"nbformat_minor": 0
375+
"nbformat_minor": 1
392376
}

FA/Q-Learning with Value Function Approximation.ipynb

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"cell_type": "code",
55
"execution_count": 1,
66
"metadata": {
7-
"collapsed": false
7+
"collapsed": true
88
},
99
"outputs": [],
1010
"source": [
@@ -31,9 +31,7 @@
3131
{
3232
"cell_type": "code",
3333
"execution_count": 2,
34-
"metadata": {
35-
"collapsed": false
36-
},
34+
"metadata": {},
3735
"outputs": [
3836
{
3937
"name": "stderr",
@@ -50,9 +48,7 @@
5048
{
5149
"cell_type": "code",
5250
"execution_count": 3,
53-
"metadata": {
54-
"collapsed": false
55-
},
51+
"metadata": {},
5652
"outputs": [
5753
{
5854
"data": {
@@ -89,7 +85,7 @@
8985
"cell_type": "code",
9086
"execution_count": 4,
9187
"metadata": {
92-
"collapsed": false
88+
"collapsed": true
9389
},
9490
"outputs": [],
9591
"source": [
@@ -149,7 +145,7 @@
149145
"cell_type": "code",
150146
"execution_count": 5,
151147
"metadata": {
152-
"collapsed": false
148+
"collapsed": true
153149
},
154150
"outputs": [],
155151
"source": [
@@ -180,7 +176,7 @@
180176
"cell_type": "code",
181177
"execution_count": 18,
182178
"metadata": {
183-
"collapsed": false
179+
"collapsed": true
184180
},
185181
"outputs": [],
186182
"source": [
@@ -193,7 +189,7 @@
193189
" env: OpenAI environment.\n",
194190
" estimator: Action-Value function estimator\n",
195191
" num_episodes: Number of episodes to run for.\n",
196-
" discount_factor: Lambda time discount factor.\n",
192+
" discount_factor: Gamma discount factor.\n",
197193
" epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
198194
" epsilon_decay: Each episode, epsilon is decayed by this factor\n",
199195
" \n",
@@ -237,9 +233,7 @@
237233
{
238234
"cell_type": "code",
239235
"execution_count": 20,
240-
"metadata": {
241-
"collapsed": false
242-
},
236+
"metadata": {},
243237
"outputs": [
244238
{
245239
"name": "stdout",
@@ -259,9 +253,7 @@
259253
{
260254
"cell_type": "code",
261255
"execution_count": 21,
262-
"metadata": {
263-
"collapsed": false
264-
},
256+
"metadata": {},
265257
"outputs": [
266258
{
267259
"data": {
@@ -326,9 +318,9 @@
326318
"name": "python",
327319
"nbconvert_exporter": "python",
328320
"pygments_lexer": "ipython3",
329-
"version": "3.4.3"
321+
"version": "3.5.2"
330322
}
331323
},
332324
"nbformat": 4,
333-
"nbformat_minor": 0
325+
"nbformat_minor": 1
334326
}

FA/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535

3636
### Exercises
3737

38+
- Get familiar with the [Mountain Car Playground](MountainCar%20Playground.ipynb)
39+
3840
- Solve Mountain Car Problem using Q-Learning with Linear Function Approximation
3941
- [Exercise](Q-Learning%20with%20Value%20Function%20Approximation.ipynb)
4042
- [Solution](Q-Learning%20with%20Value%20Function%20Approximation%20Solution.ipynb)

0 commit comments

Comments
 (0)