Update playground output

dennybritz · Feb 19, 2018 · ba12f97 · ba12f97
1 parent edcba6b
commit ba12f97
Showing 1 changed file with 128 additions and 16 deletions.
diff --git a/MC/Blackjack Playground.ipynb b/MC/Blackjack Playground.ipynb
@@ -31,22 +31,134 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Player Score: 17 (Usable Ace: False), Dealer Score: 8\n",
-      "Taking action: Hit\n"
-     ]
-    },
-    {
-     "ename": "RecursionError",
-     "evalue": "maximum recursion depth exceeded",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mRecursionError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-3-e78e3f41e925>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     15\u001b[0m         \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstrategy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     16\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Taking action: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"Stick\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Hit\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m         \u001b[0mobservation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     18\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     19\u001b[0m             \u001b[0mprint_observation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/Workspace/src/github.com/dennybritz/reinforcement-learning/lib/envs/blackjack.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m     84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     85\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     88\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_seed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "... last 1 frames repeated, from the frame below ...\n",
-      "\u001b[0;32m~/Workspace/src/github.com/dennybritz/reinforcement-learning/lib/envs/blackjack.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m     84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     85\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     88\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_seed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mRecursionError\u001b[0m: maximum recursion depth exceeded"
+      "Player Score: 19 (Usable Ace: False), Dealer Score: 5\n",
+      "Taking action: Hit\n",
+      "Player Score: 27 (Usable Ace: False), Dealer Score: 5\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 21 (Usable Ace: True), Dealer Score: 10\n",
+      "Taking action: Stick\n",
+      "Player Score: 21 (Usable Ace: True), Dealer Score: 10\n",
+      "Game end. Reward: 0.0\n",
+      "\n",
+      "Player Score: 21 (Usable Ace: True), Dealer Score: 10\n",
+      "Taking action: Stick\n",
+      "Player Score: 21 (Usable Ace: True), Dealer Score: 10\n",
+      "Game end. Reward: 1.0\n",
+      "\n",
+      "Player Score: 14 (Usable Ace: True), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 19 (Usable Ace: True), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 15 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Stick\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
+      "Game end. Reward: 1.0\n",
+      "\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Stick\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
+      "Game end. Reward: 1.0\n",
+      "\n",
+      "Player Score: 18 (Usable Ace: False), Dealer Score: 6\n",
+      "Taking action: Hit\n",
+      "Player Score: 27 (Usable Ace: False), Dealer Score: 6\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 16 (Usable Ace: False), Dealer Score: 3\n",
+      "Taking action: Hit\n",
+      "Player Score: 18 (Usable Ace: False), Dealer Score: 3\n",
+      "Taking action: Hit\n",
+      "Player Score: 23 (Usable Ace: False), Dealer Score: 3\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 19 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 23 (Usable Ace: False), Dealer Score: 10\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 19 (Usable Ace: False), Dealer Score: 4\n",
+      "Taking action: Hit\n",
+      "Player Score: 21 (Usable Ace: False), Dealer Score: 4\n",
+      "Taking action: Stick\n",
+      "Player Score: 21 (Usable Ace: False), Dealer Score: 4\n",
+      "Game end. Reward: 1.0\n",
+      "\n",
+      "Player Score: 21 (Usable Ace: True), Dealer Score: 4\n",
+      "Taking action: Stick\n",
+      "Player Score: 21 (Usable Ace: True), Dealer Score: 4\n",
+      "Game end. Reward: 1.0\n",
+      "\n",
+      "Player Score: 16 (Usable Ace: True), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 16 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 26 (Usable Ace: False), Dealer Score: 10\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 14 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 23 (Usable Ace: False), Dealer Score: 10\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 12 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 15 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 16 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 26 (Usable Ace: False), Dealer Score: 10\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 16 (Usable Ace: True), Dealer Score: 8\n",
+      "Taking action: Hit\n",
+      "Player Score: 18 (Usable Ace: True), Dealer Score: 8\n",
+      "Taking action: Hit\n",
+      "Player Score: 18 (Usable Ace: False), Dealer Score: 8\n",
+      "Taking action: Hit\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 8\n",
+      "Taking action: Stick\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 8\n",
+      "Game end. Reward: 1.0\n",
+      "\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Stick\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 10\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 15 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 16 (Usable Ace: False), Dealer Score: 10\n",
+      "Taking action: Hit\n",
+      "Player Score: 23 (Usable Ace: False), Dealer Score: 10\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 12 (Usable Ace: False), Dealer Score: 4\n",
+      "Taking action: Hit\n",
+      "Player Score: 16 (Usable Ace: False), Dealer Score: 4\n",
+      "Taking action: Hit\n",
+      "Player Score: 24 (Usable Ace: False), Dealer Score: 4\n",
+      "Game end. Reward: -1.0\n",
+      "\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 7\n",
+      "Taking action: Stick\n",
+      "Player Score: 20 (Usable Ace: False), Dealer Score: 7\n",
+      "Game end. Reward: 1.0\n",
+      "\n",
+      "Player Score: 15 (Usable Ace: False), Dealer Score: 7\n",
+      "Taking action: Hit\n",
+      "Player Score: 21 (Usable Ace: False), Dealer Score: 7\n",
+      "Taking action: Stick\n",
+      "Player Score: 21 (Usable Ace: False), Dealer Score: 7\n",
+      "Game end. Reward: 1.0\n",
+      "\n",
+      "Player Score: 15 (Usable Ace: False), Dealer Score: 8\n",
+      "Taking action: Hit\n",
+      "Player Score: 23 (Usable Ace: False), Dealer Score: 8\n",
+      "Game end. Reward: -1.0\n",
+      "\n"
      ]
     }
    ],