Migrating Actor Critic Method example to Keras 3 (TF-Only) (#1759)

sitammeur · web-flow · commit 77f512b522a7 · 2024-02-22T15:10:10.000-08:00
* migrated the example to tf only backend

* .md and .ipynb file added
diff --git a/examples/rl/actor_critic_cartpole.py b/examples/rl/actor_critic_cartpole.py
@@ -2,9 +2,10 @@
 Title: Actor Critic Method
 Author: [Apoorv Nandan](https://twitter.com/NandanApoorv)
 Date created: 2020/05/13
-Last modified: 2020/05/13
+Last modified: 2024/02/22
 Description: Implement Actor Critic Method in CartPole environment.
 Accelerator: NONE
+Converted to Keras 3 by: [Sitam Meur](https://github.com/sitamgithub-MSIT)
 """
 
 """
@@ -40,11 +41,15 @@
 ## Setup
 """
 
+import os
+
+os.environ["KERAS_BACKEND"] = "tensorflow"
 import gym
 import numpy as np
+import keras
+from keras import ops
+from keras import layers
 import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras import layers
 
 # Configuration parameters for the whole setup
 seed = 42
@@ -98,8 +103,8 @@
             # env.render(); Adding this line would show the attempts
             # of the agent in a pop up window.
 
-            state = tf.convert_to_tensor(state)
-            state = tf.expand_dims(state, 0)
+            state = ops.convert_to_tensor(state)
+            state = ops.expand_dims(state, 0)
 
             # Predict action probabilities and estimated future rewards
             # from environment state
@@ -108,7 +113,7 @@
 
             # Sample action from action probability distribution
             action = np.random.choice(num_actions, p=np.squeeze(action_probs))
-            action_probs_history.append(tf.math.log(action_probs[0, action]))
+            action_probs_history.append(ops.log(action_probs[0, action]))
 
             # Apply the sampled action in our environment
             state, reward, done, _ = env.step(action)
@@ -152,7 +157,7 @@
             # The critic must be updated so that it predicts a better estimate of
             # the future rewards.
             critic_losses.append(
-                huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))
+                huber_loss(ops.expand_dims(value, 0), ops.expand_dims(ret, 0))
             )
 
         # Backpropagation
diff --git a/examples/rl/ipynb/actor_critic_cartpole.ipynb b/examples/rl/ipynb/actor_critic_cartpole.ipynb
@@ -10,7 +10,7 @@
     "\n",
     "**Author:** [Apoorv Nandan](https://twitter.com/NandanApoorv)<br>\n",
     "**Date created:** 2020/05/13<br>\n",
-    "**Last modified:** 2020/05/13<br>\n",
+    "**Last modified:** 2024/02/22<br>\n",
     "**Description:** Implement Actor Critic Method in CartPole environment."
    ]
   },
@@ -60,17 +60,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
+    "import os\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
     "import gym\n",
     "import numpy as np\n",
+    "import keras\n",
+    "from keras import ops\n",
+    "from keras import layers\n",
     "import tensorflow as tf\n",
-    "from tensorflow import keras\n",
-    "from tensorflow.keras import layers\n",
     "\n",
     "# Configuration parameters for the whole setup\n",
     "seed = 42\n",
@@ -101,7 +104,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
@@ -130,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": null,
    "metadata": {
     "colab_type": "code"
    },
@@ -152,8 +155,8 @@
     "            # env.render(); Adding this line would show the attempts\n",
     "            # of the agent in a pop up window.\n",
     "\n",
-    "            state = tf.convert_to_tensor(state)\n",
-    "            state = tf.expand_dims(state, 0)\n",
+    "            state = ops.convert_to_tensor(state)\n",
+    "            state = ops.expand_dims(state, 0)\n",
     "\n",
     "            # Predict action probabilities and estimated future rewards\n",
     "            # from environment state\n",
@@ -162,7 +165,7 @@
     "\n",
     "            # Sample action from action probability distribution\n",
     "            action = np.random.choice(num_actions, p=np.squeeze(action_probs))\n",
-    "            action_probs_history.append(tf.math.log(action_probs[0, action]))\n",
+    "            action_probs_history.append(ops.log(action_probs[0, action]))\n",
     "\n",
     "            # Apply the sampled action in our environment\n",
     "            state, reward, done, _ = env.step(action)\n",
@@ -206,7 +209,7 @@
     "            # The critic must be updated so that it predicts a better estimate of\n",
     "            # the future rewards.\n",
     "            critic_losses.append(\n",
-    "                huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))\n",
+    "                huber_loss(ops.expand_dims(value, 0), ops.expand_dims(ret, 0))\n",
     "            )\n",
     "\n",
     "        # Backpropagation\n",
diff --git a/examples/rl/md/actor_critic_cartpole.md b/examples/rl/md/actor_critic_cartpole.md
@@ -2,7 +2,7 @@
 
 **Author:** [Apoorv Nandan](https://twitter.com/NandanApoorv)<br>
 **Date created:** 2020/05/13<br>
-**Last modified:** 2020/05/13<br>
+**Last modified:** 2024/02/22<br>
 **Description:** Implement Actor Critic Method in CartPole environment.
 
 
@@ -46,11 +46,14 @@ remains upright. The agent, therefore, must learn to keep the pole from falling
 
 
 ```python
+import os
+os.environ["KERAS_BACKEND"] = "tensorflow"
 import gym
 import numpy as np
+import keras
+from keras import ops
+from keras import layers
 import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras import layers
 
 # Configuration parameters for the whole setup
 seed = 42
@@ -112,8 +115,8 @@ while True:  # Run until solved
             # env.render(); Adding this line would show the attempts
             # of the agent in a pop up window.
 
-            state = tf.convert_to_tensor(state)
-            state = tf.expand_dims(state, 0)
+            state = ops.convert_to_tensor(state)
+            state = ops.expand_dims(state, 0)
 
             # Predict action probabilities and estimated future rewards
             # from environment state
@@ -122,7 +125,7 @@ while True:  # Run until solved
 
             # Sample action from action probability distribution
             action = np.random.choice(num_actions, p=np.squeeze(action_probs))
-            action_probs_history.append(tf.math.log(action_probs[0, action]))
+            action_probs_history.append(ops.log(action_probs[0, action]))
 
             # Apply the sampled action in our environment
             state, reward, done, _ = env.step(action)
@@ -166,7 +169,7 @@ while True:  # Run until solved
             # The critic must be updated so that it predicts a better estimate of
             # the future rewards.
             critic_losses.append(
-                huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))
+                huber_loss(ops.expand_dims(value, 0), ops.expand_dims(ret, 0))
             )
 
         # Backpropagation