Skip to content

Commit 783c2c3

Browse files
author
Alex
committed
Mod. estimator_value comment in actor-critic
1 parent 1f2e2eb commit 783c2c3

File tree

2 files changed

+13
-21
lines changed

2 files changed

+13
-21
lines changed

PolicyGradient/CliffWalk Actor Critic Solution.ipynb

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 16,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"%matplotlib inline\n",
@@ -29,9 +27,7 @@
2927
{
3028
"cell_type": "code",
3129
"execution_count": 17,
32-
"metadata": {
33-
"collapsed": false
34-
},
30+
"metadata": {},
3531
"outputs": [],
3632
"source": [
3733
"env = CliffWalkingEnv()"
@@ -88,9 +84,7 @@
8884
{
8985
"cell_type": "code",
9086
"execution_count": 19,
91-
"metadata": {
92-
"collapsed": false
93-
},
87+
"metadata": {},
9488
"outputs": [],
9589
"source": [
9690
"class ValueEstimator():\n",
@@ -145,7 +139,7 @@
145139
" Args:\n",
146140
" env: OpenAI environment.\n",
147141
" estimator_policy: Policy Function to be optimized \n",
148-
" estimator_value: Value function approximator, used as a baseline\n",
142+
" estimator_value: Value function approximator, used as a critic\n",
149143
" num_episodes: Number of episodes to run for\n",
150144
" discount_factor: Time-discount factor\n",
151145
" \n",
@@ -209,9 +203,7 @@
209203
{
210204
"cell_type": "code",
211205
"execution_count": 26,
212-
"metadata": {
213-
"collapsed": false
214-
},
206+
"metadata": {},
215207
"outputs": [
216208
{
217209
"name": "stdout",
@@ -238,9 +230,7 @@
238230
{
239231
"cell_type": "code",
240232
"execution_count": 28,
241-
"metadata": {
242-
"collapsed": false
243-
},
233+
"metadata": {},
244234
"outputs": [
245235
{
246236
"data": {
@@ -306,9 +296,9 @@
306296
"name": "python",
307297
"nbconvert_exporter": "python",
308298
"pygments_lexer": "ipython3",
309-
"version": "3.5.0"
299+
"version": "3.5.2"
310300
}
311301
},
312302
"nbformat": 4,
313-
"nbformat_minor": 0
303+
"nbformat_minor": 1
314304
}

PolicyGradient/Continuous MountainCar Actor Critic Solution.ipynb

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@
233233
" Args:\n",
234234
" env: OpenAI environment.\n",
235235
" estimator_policy: Policy Function to be optimized \n",
236-
" estimator_value: Value function approximator, used as a baseline\n",
236+
" estimator_value: Value function approximator, used as a critic\n",
237237
" num_episodes: Number of episodes to run for\n",
238238
" discount_factor: Time-discount factor\n",
239239
" \n",
@@ -343,7 +343,9 @@
343343
{
344344
"cell_type": "code",
345345
"execution_count": null,
346-
"metadata": {},
346+
"metadata": {
347+
"collapsed": true
348+
},
347349
"outputs": [],
348350
"source": [
349351
"plotting.plot_episode_stats(stats, smoothing_window=10)"
@@ -384,7 +386,7 @@
384386
"name": "python",
385387
"nbconvert_exporter": "python",
386388
"pygments_lexer": "ipython3",
387-
"version": "3.6.1"
389+
"version": "3.5.2"
388390
}
389391
},
390392
"nbformat": 4,

0 commit comments

Comments
 (0)