Skip to content

Commit

Permalink
Update playground output
Browse files Browse the repository at this point in the history
  • Loading branch information
Sanyam Kapoor committed Feb 19, 2018
1 parent edcba6b commit ba12f97
Showing 1 changed file with 128 additions and 16 deletions.
144 changes: 128 additions & 16 deletions MC/Blackjack Playground.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,134 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Player Score: 17 (Usable Ace: False), Dealer Score: 8\n",
"Taking action: Hit\n"
]
},
{
"ename": "RecursionError",
"evalue": "maximum recursion depth exceeded",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRecursionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-e78e3f41e925>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstrategy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Taking action: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"Stick\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Hit\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mobservation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0mprint_observation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Workspace/src/github.com/dennybritz/reinforcement-learning/lib/envs/blackjack.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_seed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"... last 1 frames repeated, from the frame below ...\n",
"\u001b[0;32m~/Workspace/src/github.com/dennybritz/reinforcement-learning/lib/envs/blackjack.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_seed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mRecursionError\u001b[0m: maximum recursion depth exceeded"
"Player Score: 19 (Usable Ace: False), Dealer Score: 5\n",
"Taking action: Hit\n",
"Player Score: 27 (Usable Ace: False), Dealer Score: 5\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 21 (Usable Ace: True), Dealer Score: 10\n",
"Taking action: Stick\n",
"Player Score: 21 (Usable Ace: True), Dealer Score: 10\n",
"Game end. Reward: 0.0\n",
"\n",
"Player Score: 21 (Usable Ace: True), Dealer Score: 10\n",
"Taking action: Stick\n",
"Player Score: 21 (Usable Ace: True), Dealer Score: 10\n",
"Game end. Reward: 1.0\n",
"\n",
"Player Score: 14 (Usable Ace: True), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 19 (Usable Ace: True), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 15 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Stick\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
"Game end. Reward: 1.0\n",
"\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Stick\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
"Game end. Reward: 1.0\n",
"\n",
"Player Score: 18 (Usable Ace: False), Dealer Score: 6\n",
"Taking action: Hit\n",
"Player Score: 27 (Usable Ace: False), Dealer Score: 6\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 16 (Usable Ace: False), Dealer Score: 3\n",
"Taking action: Hit\n",
"Player Score: 18 (Usable Ace: False), Dealer Score: 3\n",
"Taking action: Hit\n",
"Player Score: 23 (Usable Ace: False), Dealer Score: 3\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 19 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 23 (Usable Ace: False), Dealer Score: 10\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 19 (Usable Ace: False), Dealer Score: 4\n",
"Taking action: Hit\n",
"Player Score: 21 (Usable Ace: False), Dealer Score: 4\n",
"Taking action: Stick\n",
"Player Score: 21 (Usable Ace: False), Dealer Score: 4\n",
"Game end. Reward: 1.0\n",
"\n",
"Player Score: 21 (Usable Ace: True), Dealer Score: 4\n",
"Taking action: Stick\n",
"Player Score: 21 (Usable Ace: True), Dealer Score: 4\n",
"Game end. Reward: 1.0\n",
"\n",
"Player Score: 16 (Usable Ace: True), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 16 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 26 (Usable Ace: False), Dealer Score: 10\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 14 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 23 (Usable Ace: False), Dealer Score: 10\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 12 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 15 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 16 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 26 (Usable Ace: False), Dealer Score: 10\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 16 (Usable Ace: True), Dealer Score: 8\n",
"Taking action: Hit\n",
"Player Score: 18 (Usable Ace: True), Dealer Score: 8\n",
"Taking action: Hit\n",
"Player Score: 18 (Usable Ace: False), Dealer Score: 8\n",
"Taking action: Hit\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 8\n",
"Taking action: Stick\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 8\n",
"Game end. Reward: 1.0\n",
"\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Stick\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 15 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 16 (Usable Ace: False), Dealer Score: 10\n",
"Taking action: Hit\n",
"Player Score: 23 (Usable Ace: False), Dealer Score: 10\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 12 (Usable Ace: False), Dealer Score: 4\n",
"Taking action: Hit\n",
"Player Score: 16 (Usable Ace: False), Dealer Score: 4\n",
"Taking action: Hit\n",
"Player Score: 24 (Usable Ace: False), Dealer Score: 4\n",
"Game end. Reward: -1.0\n",
"\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 7\n",
"Taking action: Stick\n",
"Player Score: 20 (Usable Ace: False), Dealer Score: 7\n",
"Game end. Reward: 1.0\n",
"\n",
"Player Score: 15 (Usable Ace: False), Dealer Score: 7\n",
"Taking action: Hit\n",
"Player Score: 21 (Usable Ace: False), Dealer Score: 7\n",
"Taking action: Stick\n",
"Player Score: 21 (Usable Ace: False), Dealer Score: 7\n",
"Game end. Reward: 1.0\n",
"\n",
"Player Score: 15 (Usable Ace: False), Dealer Score: 8\n",
"Taking action: Hit\n",
"Player Score: 23 (Usable Ace: False), Dealer Score: 8\n",
"Game end. Reward: -1.0\n",
"\n"
]
}
],
Expand Down

0 comments on commit ba12f97

Please sign in to comment.