xuyanshi
diff --git a/‎Project4/nim/__pycache__/nim.cpython-39.pyc
5 Bytes b/‎Project4/nim/__pycache__/nim.cpython-39.pyc
5 Bytes
diff --git a/‎Project4/nim/nim.py
+1-1 b/‎Project4/nim/nim.py
+1-1
@@ -118,7 +118,7 @@ def update_q_value(self, state, action, old_q, reward, future_rewards):
         `alpha` is the learning rate, and `new value estimate`
         is the sum of the current reward and estimated future rewards.
         """
-        self.q[state, action] = old_q + self.alpha * (reward + future_rewards - old_q)
+        self.q[(tuple(state), tuple(action))] = old_q + self.alpha * (reward + future_rewards - old_q)
 
     def best_future_reward(self, state):
         """