|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "code", |
5 | | - "execution_count": 419, |
6 | | - "metadata": { |
7 | | - "collapsed": true |
8 | | - }, |
| 5 | + "execution_count": 1, |
| 6 | + "metadata": {}, |
9 | 7 | "outputs": [], |
10 | 8 | "source": [ |
11 | 9 | "import numpy as np\n", |
|
17 | 15 | }, |
18 | 16 | { |
19 | 17 | "cell_type": "code", |
20 | | - "execution_count": 420, |
| 18 | + "execution_count": 2, |
21 | 19 | "metadata": {}, |
22 | 20 | "outputs": [], |
23 | 21 | "source": [ |
|
26 | 24 | }, |
27 | 25 | { |
28 | 26 | "cell_type": "code", |
29 | | - "execution_count": 422, |
| 27 | + "execution_count": 3, |
30 | 28 | "metadata": {}, |
31 | 29 | "outputs": [ |
32 | 30 | { |
33 | 31 | "name": "stdout", |
34 | 32 | "output_type": "stream", |
35 | 33 | "text": [ |
36 | | - "Player Score: 17 (Usable Ace: False), Dealer Score: 10\n", |
37 | | - "Taking action: Hit\n", |
38 | | - "Player Score: 18 (Usable Ace: False), Dealer Score: 10\n", |
39 | | - "Taking action: Hit\n", |
40 | | - "Player Score: 28 (Usable Ace: False), Dealer Score: 10\n", |
41 | | - "Game end. Reward: -1.0\n", |
42 | | - "\n", |
43 | | - "Player Score: 6 (Usable Ace: False), Dealer Score: 9\n", |
44 | | - "Taking action: Hit\n", |
45 | | - "Player Score: 16 (Usable Ace: False), Dealer Score: 9\n", |
46 | | - "Taking action: Hit\n", |
47 | | - "Player Score: 26 (Usable Ace: False), Dealer Score: 9\n", |
48 | | - "Game end. Reward: -1.0\n", |
49 | | - "\n", |
50 | | - "Player Score: 12 (Usable Ace: False), Dealer Score: 6\n", |
51 | | - "Taking action: Hit\n", |
52 | | - "Player Score: 21 (Usable Ace: False), Dealer Score: 6\n", |
53 | | - "Taking action: Stick\n", |
54 | | - "Player Score: 21 (Usable Ace: False), Dealer Score: 6\n", |
55 | | - "Game end. Reward: 1.0\n", |
56 | | - "\n", |
57 | | - "Player Score: 17 (Usable Ace: True), Dealer Score: 8\n", |
58 | | - "Taking action: Hit\n", |
59 | | - "Player Score: 17 (Usable Ace: False), Dealer Score: 8\n", |
60 | | - "Taking action: Hit\n", |
61 | | - "Player Score: 22 (Usable Ace: False), Dealer Score: 8\n", |
62 | | - "Game end. Reward: -1.0\n", |
63 | | - "\n", |
64 | 34 | "Player Score: 17 (Usable Ace: False), Dealer Score: 8\n", |
65 | | - "Taking action: Hit\n", |
66 | | - "Player Score: 27 (Usable Ace: False), Dealer Score: 8\n", |
67 | | - "Game end. Reward: -1.0\n", |
68 | | - "\n", |
69 | | - "Player Score: 16 (Usable Ace: False), Dealer Score: 10\n", |
70 | | - "Taking action: Hit\n", |
71 | | - "Player Score: 19 (Usable Ace: False), Dealer Score: 10\n", |
72 | | - "Taking action: Hit\n", |
73 | | - "Player Score: 28 (Usable Ace: False), Dealer Score: 10\n", |
74 | | - "Game end. Reward: -1.0\n", |
75 | | - "\n", |
76 | | - "Player Score: 13 (Usable Ace: False), Dealer Score: 7\n", |
77 | | - "Taking action: Hit\n", |
78 | | - "Player Score: 14 (Usable Ace: False), Dealer Score: 7\n", |
79 | | - "Taking action: Hit\n", |
80 | | - "Player Score: 24 (Usable Ace: False), Dealer Score: 7\n", |
81 | | - "Game end. Reward: -1.0\n", |
82 | | - "\n", |
83 | | - "Player Score: 17 (Usable Ace: False), Dealer Score: 5\n", |
84 | | - "Taking action: Hit\n", |
85 | | - "Player Score: 25 (Usable Ace: False), Dealer Score: 5\n", |
86 | | - "Game end. Reward: -1.0\n", |
87 | | - "\n", |
88 | | - "Player Score: 20 (Usable Ace: False), Dealer Score: 5\n", |
89 | | - "Taking action: Stick\n", |
90 | | - "Player Score: 20 (Usable Ace: False), Dealer Score: 5\n", |
91 | | - "Game end. Reward: 1.0\n", |
92 | | - "\n", |
93 | | - "Player Score: 12 (Usable Ace: True), Dealer Score: 10\n", |
94 | | - "Taking action: Hit\n", |
95 | | - "Player Score: 20 (Usable Ace: True), Dealer Score: 10\n", |
96 | | - "Taking action: Stick\n", |
97 | | - "Player Score: 20 (Usable Ace: True), Dealer Score: 10\n", |
98 | | - "Game end. Reward: 0.0\n", |
99 | | - "\n", |
100 | | - "Player Score: 12 (Usable Ace: False), Dealer Score: 10\n", |
101 | | - "Taking action: Hit\n", |
102 | | - "Player Score: 19 (Usable Ace: False), Dealer Score: 10\n", |
103 | | - "Taking action: Hit\n", |
104 | | - "Player Score: 24 (Usable Ace: False), Dealer Score: 10\n", |
105 | | - "Game end. Reward: -1.0\n", |
106 | | - "\n", |
107 | | - "Player Score: 19 (Usable Ace: False), Dealer Score: 4\n", |
108 | | - "Taking action: Hit\n", |
109 | | - "Player Score: 22 (Usable Ace: False), Dealer Score: 4\n", |
110 | | - "Game end. Reward: -1.0\n", |
111 | | - "\n", |
112 | | - "Player Score: 16 (Usable Ace: False), Dealer Score: 10\n", |
113 | | - "Taking action: Hit\n", |
114 | | - "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n", |
115 | | - "Taking action: Stick\n", |
116 | | - "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n", |
117 | | - "Game end. Reward: 0.0\n", |
118 | | - "\n", |
119 | | - "Player Score: 4 (Usable Ace: False), Dealer Score: 3\n", |
120 | | - "Taking action: Hit\n", |
121 | | - "Player Score: 14 (Usable Ace: False), Dealer Score: 3\n", |
122 | | - "Taking action: Hit\n", |
123 | | - "Player Score: 24 (Usable Ace: False), Dealer Score: 3\n", |
124 | | - "Game end. Reward: -1.0\n", |
125 | | - "\n", |
126 | | - "Player Score: 21 (Usable Ace: True), Dealer Score: 10\n", |
127 | | - "Taking action: Stick\n", |
128 | | - "Player Score: 21 (Usable Ace: True), Dealer Score: 10\n", |
129 | | - "Game end. Reward: 1.0\n", |
130 | | - "\n", |
131 | | - "Player Score: 16 (Usable Ace: True), Dealer Score: 10\n", |
132 | | - "Taking action: Hit\n", |
133 | | - "Player Score: 12 (Usable Ace: False), Dealer Score: 10\n", |
134 | | - "Taking action: Hit\n", |
135 | | - "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n", |
136 | | - "Taking action: Stick\n", |
137 | | - "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n", |
138 | | - "Game end. Reward: 1.0\n", |
139 | | - "\n", |
140 | | - "Player Score: 9 (Usable Ace: False), Dealer Score: 10\n", |
141 | | - "Taking action: Hit\n", |
142 | | - "Player Score: 19 (Usable Ace: False), Dealer Score: 10\n", |
143 | | - "Taking action: Hit\n", |
144 | | - "Player Score: 26 (Usable Ace: False), Dealer Score: 10\n", |
145 | | - "Game end. Reward: -1.0\n", |
146 | | - "\n", |
147 | | - "Player Score: 12 (Usable Ace: False), Dealer Score: 5\n", |
148 | | - "Taking action: Hit\n", |
149 | | - "Player Score: 15 (Usable Ace: False), Dealer Score: 5\n", |
150 | | - "Taking action: Hit\n", |
151 | | - "Player Score: 21 (Usable Ace: False), Dealer Score: 5\n", |
152 | | - "Taking action: Stick\n", |
153 | | - "Player Score: 21 (Usable Ace: False), Dealer Score: 5\n", |
154 | | - "Game end. Reward: 1.0\n", |
155 | | - "\n", |
156 | | - "Player Score: 11 (Usable Ace: False), Dealer Score: 9\n", |
157 | | - "Taking action: Hit\n", |
158 | | - "Player Score: 13 (Usable Ace: False), Dealer Score: 9\n", |
159 | | - "Taking action: Hit\n", |
160 | | - "Player Score: 17 (Usable Ace: False), Dealer Score: 9\n", |
161 | | - "Taking action: Hit\n", |
162 | | - "Player Score: 19 (Usable Ace: False), Dealer Score: 9\n", |
163 | | - "Taking action: Hit\n", |
164 | | - "Player Score: 29 (Usable Ace: False), Dealer Score: 9\n", |
165 | | - "Game end. Reward: -1.0\n", |
166 | | - "\n", |
167 | | - "Player Score: 14 (Usable Ace: False), Dealer Score: 7\n", |
168 | | - "Taking action: Hit\n", |
169 | | - "Player Score: 19 (Usable Ace: False), Dealer Score: 7\n", |
170 | | - "Taking action: Hit\n", |
171 | | - "Player Score: 29 (Usable Ace: False), Dealer Score: 7\n", |
172 | | - "Game end. Reward: -1.0\n", |
173 | | - "\n" |
| 35 | + "Taking action: Hit\n" |
| 36 | + ] |
| 37 | + }, |
| 38 | + { |
| 39 | + "ename": "RecursionError", |
| 40 | + "evalue": "maximum recursion depth exceeded", |
| 41 | + "output_type": "error", |
| 42 | + "traceback": [ |
| 43 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 44 | + "\u001b[0;31mRecursionError\u001b[0m Traceback (most recent call last)", |
| 45 | + "\u001b[0;32m<ipython-input-3-e78e3f41e925>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstrategy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Taking action: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"Stick\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Hit\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mobservation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0mprint_observation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
| 46 | + "\u001b[0;32m~/Workspace/src/github.com/dennybritz/reinforcement-learning/lib/envs/blackjack.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_seed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
| 47 | + "... last 1 frames repeated, from the frame below ...\n", |
| 48 | + "\u001b[0;32m~/Workspace/src/github.com/dennybritz/reinforcement-learning/lib/envs/blackjack.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_seed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
| 49 | + "\u001b[0;31mRecursionError\u001b[0m: maximum recursion depth exceeded" |
174 | 50 | ] |
175 | 51 | } |
176 | 52 | ], |
|
197 | 73 | " print(\"Game end. Reward: {}\\n\".format(float(reward)))\n", |
198 | 74 | " break" |
199 | 75 | ] |
| 76 | + }, |
| 77 | + { |
| 78 | + "cell_type": "code", |
| 79 | + "execution_count": null, |
| 80 | + "metadata": {}, |
| 81 | + "outputs": [], |
| 82 | + "source": [] |
200 | 83 | } |
201 | 84 | ], |
202 | 85 | "metadata": { |
|
215 | 98 | "name": "python", |
216 | 99 | "nbconvert_exporter": "python", |
217 | 100 | "pygments_lexer": "ipython3", |
218 | | - "version": "3.5.2" |
| 101 | + "version": "3.6.4" |
219 | 102 | } |
220 | 103 | }, |
221 | 104 | "nbformat": 4, |
|
0 commit comments