Skip to content

Commit dee1e01

Browse files
author
Alex
committed
DQN: Fixed typos. Changed labmda to gamma. Updated Readme
1 parent f45bcbf commit dee1e01

File tree

5 files changed

+50
-66
lines changed

5 files changed

+50
-66
lines changed

DQN/Deep Q Learning Solution.ipynb

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@
3030
{
3131
"cell_type": "code",
3232
"execution_count": null,
33-
"metadata": {},
33+
"metadata": {
34+
"collapsed": true
35+
},
3436
"outputs": [],
3537
"source": [
3638
"env = gym.envs.make(\"Breakout-v0\")"
@@ -39,7 +41,9 @@
3941
{
4042
"cell_type": "code",
4143
"execution_count": null,
42-
"metadata": {},
44+
"metadata": {
45+
"collapsed": true
46+
},
4347
"outputs": [],
4448
"source": [
4549
"# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions\n",
@@ -56,7 +60,7 @@
5660
"source": [
5761
"class StateProcessor():\n",
5862
" \"\"\"\n",
59-
" Processes a raw Atari iamges. Resizes it and converts it to grayscale.\n",
63+
" Processes a raw Atari images. Resizes it and converts it to grayscale.\n",
6064
" \"\"\"\n",
6165
" def __init__(self):\n",
6266
" # Build the Tensorflow graph\n",
@@ -83,7 +87,9 @@
8387
{
8488
"cell_type": "code",
8589
"execution_count": null,
86-
"metadata": {},
90+
"metadata": {
91+
"collapsed": true
92+
},
8793
"outputs": [],
8894
"source": [
8995
"class Estimator():\n",
@@ -193,7 +199,9 @@
193199
{
194200
"cell_type": "code",
195201
"execution_count": null,
196-
"metadata": {},
202+
"metadata": {
203+
"collapsed": true
204+
},
197205
"outputs": [],
198206
"source": [
199207
"# For Testing....\n",
@@ -295,7 +303,9 @@
295303
{
296304
"cell_type": "code",
297305
"execution_count": null,
298-
"metadata": {},
306+
"metadata": {
307+
"collapsed": true
308+
},
299309
"outputs": [],
300310
"source": [
301311
"def deep_q_learning(sess,\n",
@@ -315,7 +325,7 @@
315325
" batch_size=32,\n",
316326
" record_video_every=50):\n",
317327
" \"\"\"\n",
318-
" Q-Learning algorithm for fff-policy TD control using Function Approximation.\n",
328+
" Q-Learning algorithm for off-policy TD control using Function Approximation.\n",
319329
" Finds the optimal greedy policy while following an epsilon-greedy policy.\n",
320330
"\n",
321331
" Args:\n",
@@ -331,7 +341,7 @@
331341
" the reply memory.\n",
332342
" update_target_estimator_every: Copy parameters from the Q estimator to the \n",
333343
" target estimator every N steps\n",
334-
" discount_factor: Lambda time discount factor\n",
344+
" discount_factor: Gamma discount factor\n",
335345
" epsilon_start: Chance to sample a random action when taking an action.\n",
336346
" Epsilon is decayed over time and this is the start value\n",
337347
" epsilon_end: The final minimum value of epsilon after decaying is done\n",
@@ -494,7 +504,9 @@
494504
{
495505
"cell_type": "code",
496506
"execution_count": null,
497-
"metadata": {},
507+
"metadata": {
508+
"collapsed": true
509+
},
498510
"outputs": [],
499511
"source": [
500512
"tf.reset_default_graph()\n",
@@ -569,7 +581,7 @@
569581
"name": "python",
570582
"nbconvert_exporter": "python",
571583
"pygments_lexer": "ipython3",
572-
"version": "3.6.0"
584+
"version": "3.5.2"
573585
}
574586
},
575587
"nbformat": 4,

DQN/Deep Q Learning.ipynb

Lines changed: 12 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@
2929
{
3030
"cell_type": "code",
3131
"execution_count": null,
32-
"metadata": {
33-
"collapsed": false
34-
},
32+
"metadata": {},
3533
"outputs": [],
3634
"source": [
3735
"env = gym.envs.make(\"Breakout-v0\")"
@@ -40,9 +38,7 @@
4038
{
4139
"cell_type": "code",
4240
"execution_count": null,
43-
"metadata": {
44-
"collapsed": false
45-
},
41+
"metadata": {},
4642
"outputs": [],
4743
"source": [
4844
"# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions\n",
@@ -59,7 +55,7 @@
5955
"source": [
6056
"class StateProcessor():\n",
6157
" \"\"\"\n",
62-
" Processes a raw Atari iamges. Resizes it and converts it to grayscale.\n",
58+
" Processes a raw Atari images. Resizes it and converts it to grayscale.\n",
6359
" \"\"\"\n",
6460
" def __init__(self):\n",
6561
" # Build the Tensorflow graph\n",
@@ -86,9 +82,7 @@
8682
{
8783
"cell_type": "code",
8884
"execution_count": null,
89-
"metadata": {
90-
"collapsed": false
91-
},
85+
"metadata": {},
9286
"outputs": [],
9387
"source": [
9488
"class Estimator():\n",
@@ -199,9 +193,7 @@
199193
{
200194
"cell_type": "code",
201195
"execution_count": null,
202-
"metadata": {
203-
"collapsed": false
204-
},
196+
"metadata": {},
205197
"outputs": [],
206198
"source": [
207199
"# For Testing....\n",
@@ -234,9 +226,7 @@
234226
{
235227
"cell_type": "code",
236228
"execution_count": null,
237-
"metadata": {
238-
"collapsed": false
239-
},
229+
"metadata": {},
240230
"outputs": [],
241231
"source": [
242232
"def copy_model_parameters(sess, estimator1, estimator2):\n",
@@ -294,9 +284,7 @@
294284
{
295285
"cell_type": "code",
296286
"execution_count": null,
297-
"metadata": {
298-
"collapsed": false
299-
},
287+
"metadata": {},
300288
"outputs": [],
301289
"source": [
302290
"def deep_q_learning(sess,\n",
@@ -316,7 +304,7 @@
316304
" batch_size=32,\n",
317305
" record_video_every=50):\n",
318306
" \"\"\"\n",
319-
" Q-Learning algorithm for fff-policy TD control using Function Approximation.\n",
307+
" Q-Learning algorithm for off-policy TD control using Function Approximation.\n",
320308
" Finds the optimal greedy policy while following an epsilon-greedy policy.\n",
321309
"\n",
322310
" Args:\n",
@@ -332,7 +320,7 @@
332320
" the reply memory.\n",
333321
" update_target_estimator_every: Copy parameters from the Q estimator to the \n",
334322
" target estimator every N steps\n",
335-
" discount_factor: Lambda time discount factor\n",
323+
" discount_factor: Gamma discount factor\n",
336324
" epsilon_start: Chance to sample a random action when taking an action.\n",
337325
" Epsilon is decayed over time and this is the start value\n",
338326
" epsilon_end: The final minimum value of epsilon after decaying is done\n",
@@ -469,9 +457,7 @@
469457
{
470458
"cell_type": "code",
471459
"execution_count": null,
472-
"metadata": {
473-
"collapsed": false
474-
},
460+
"metadata": {},
475461
"outputs": [],
476462
"source": [
477463
"tf.reset_default_graph()\n",
@@ -528,9 +514,9 @@
528514
"name": "python",
529515
"nbconvert_exporter": "python",
530516
"pygments_lexer": "ipython3",
531-
"version": "3.6.0"
517+
"version": "3.5.2"
532518
}
533519
},
534520
"nbformat": 4,
535-
"nbformat_minor": 0
521+
"nbformat_minor": 1
536522
}

DQN/Double DQN Solution.ipynb

Lines changed: 12 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@
2828
{
2929
"cell_type": "code",
3030
"execution_count": null,
31-
"metadata": {
32-
"collapsed": false
33-
},
31+
"metadata": {},
3432
"outputs": [],
3533
"source": [
3634
"env = gym.envs.make(\"Breakout-v0\")"
@@ -39,9 +37,7 @@
3937
{
4038
"cell_type": "code",
4139
"execution_count": null,
42-
"metadata": {
43-
"collapsed": false
44-
},
40+
"metadata": {},
4541
"outputs": [],
4642
"source": [
4743
"# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions\n",
@@ -58,7 +54,7 @@
5854
"source": [
5955
"class StateProcessor():\n",
6056
" \"\"\"\n",
61-
" Processes a raw Atari iamges. Resizes it and converts it to grayscale.\n",
57+
" Processes a raw Atari images. Resizes it and converts it to grayscale.\n",
6258
" \"\"\"\n",
6359
" def __init__(self):\n",
6460
" # Build the Tensorflow graph\n",
@@ -85,9 +81,7 @@
8581
{
8682
"cell_type": "code",
8783
"execution_count": null,
88-
"metadata": {
89-
"collapsed": false
90-
},
84+
"metadata": {},
9185
"outputs": [],
9286
"source": [
9387
"class Estimator():\n",
@@ -175,9 +169,7 @@
175169
{
176170
"cell_type": "code",
177171
"execution_count": null,
178-
"metadata": {
179-
"collapsed": false
180-
},
172+
"metadata": {},
181173
"outputs": [],
182174
"source": [
183175
"# For Testing....\n",
@@ -210,9 +202,7 @@
210202
{
211203
"cell_type": "code",
212204
"execution_count": null,
213-
"metadata": {
214-
"collapsed": false
215-
},
205+
"metadata": {},
216206
"outputs": [],
217207
"source": [
218208
"def copy_model_parameters(sess, estimator1, estimator2):\n",
@@ -270,9 +260,7 @@
270260
{
271261
"cell_type": "code",
272262
"execution_count": null,
273-
"metadata": {
274-
"collapsed": false
275-
},
263+
"metadata": {},
276264
"outputs": [],
277265
"source": [
278266
"def deep_q_learning(sess,\n",
@@ -292,7 +280,7 @@
292280
" batch_size=32,\n",
293281
" record_video_every=50):\n",
294282
" \"\"\"\n",
295-
" Q-Learning algorithm for fff-policy TD control using Function Approximation.\n",
283+
" Q-Learning algorithm for off-policy TD control using Function Approximation.\n",
296284
" Finds the optimal greedy policy while following an epsilon-greedy policy.\n",
297285
"\n",
298286
" Args:\n",
@@ -308,7 +296,7 @@
308296
" the reply memory.\n",
309297
" update_target_estimator_every: Copy parameters from the Q estimator to the \n",
310298
" target estimator every N steps\n",
311-
" discount_factor: Lambda time discount factor\n",
299+
" discount_factor: Gamma discount factor\n",
312300
" epsilon_start: Chance to sample a random action when taking an action.\n",
313301
" Epsilon is decayed over time and this is the start value\n",
314302
" epsilon_end: The final minimum value of epsilon after decaying is done\n",
@@ -472,9 +460,7 @@
472460
{
473461
"cell_type": "code",
474462
"execution_count": null,
475-
"metadata": {
476-
"collapsed": false
477-
},
463+
"metadata": {},
478464
"outputs": [],
479465
"source": [
480466
"tf.reset_default_graph()\n",
@@ -531,9 +517,9 @@
531517
"name": "python",
532518
"nbconvert_exporter": "python",
533519
"pygments_lexer": "ipython3",
534-
"version": "3.5.1"
520+
"version": "3.5.2"
535521
}
536522
},
537523
"nbformat": 4,
538-
"nbformat_minor": 0
524+
"nbformat_minor": 1
539525
}

DQN/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
### Exercises
4141

42-
- [OpenAI Gym Atari Environment Playground](Breakout%20Playground.ipynb)
42+
- Get familiar with the [OpenAI Gym Atari Environment Playground](Breakout%20Playground.ipynb)
4343
- Deep-Q Learning for Atari Games
4444
- [Exercise](Deep%20Q%20Learning.ipynb)
4545
- [Solution](Deep%20Q%20Learning%20Solution.ipynb)

DQN/dqn.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
class StateProcessor():
2222
"""
23-
Processes a raw Atari iamges. Resizes it and converts it to grayscale.
23+
Processes a raw Atari images. Resizes it and converts it to grayscale.
2424
"""
2525
def __init__(self):
2626
# Build the Tensorflow graph
@@ -208,7 +208,7 @@ def deep_q_learning(sess,
208208
batch_size=32,
209209
record_video_every=50):
210210
"""
211-
Q-Learning algorithm for fff-policy TD control using Function Approximation.
211+
Q-Learning algorithm for off-policy TD control using Function Approximation.
212212
Finds the optimal greedy policy while following an epsilon-greedy policy.
213213
214214
Args:
@@ -224,7 +224,7 @@ def deep_q_learning(sess,
224224
the reply memory.
225225
update_target_estimator_every: Copy parameters from the Q estimator to the
226226
target estimator every N steps
227-
discount_factor: Lambda time discount factor
227+
discount_factor: Gamma discount factor
228228
epsilon_start: Chance to sample a random action when taking an action.
229229
Epsilon is decayed over time and this is the start value
230230
epsilon_end: The final minimum value of epsilon after decaying is done

0 commit comments

Comments
 (0)