girafe-ai · klensy · Sep 17, 2025 · Sep 17, 2025
diff --git a/homeworks/hw02_cross_entropy/01_crossentropy_method.ipynb b/homeworks/hw02_cross_entropy/01_crossentropy_method.ipynb
@@ -493,7 +493,7 @@
         "elite_states = [1, 2, 3, 4, 2, 0, 2, 3, 1]\n",
         "elite_actions = [0, 2, 4, 3, 2, 0, 1, 3, 3]\n",
         "\n",
-        "new_policy = update_policy(elite_states, elite_actions)\n",
+        "new_policy = update_policy(elite_states, elite_actions, n_states, n_actions)\n",
         "\n",
         "assert np.isfinite(new_policy).all(\n",
         "), \"Your new policy contains NaNs or +-inf. Make sure you don't divide by zero.\"\n",
@@ -587,13 +587,13 @@
         "\n",
         "for i in range(100):\n",
         "\n",
-        "    %time sessions = [generate_session(policy) for _ in range(n_sessions)]\n",
+        "    %time sessions = [generate_session(env, policy) for _ in range(n_sessions)]\n",
         "\n",
         "    states_batch, actions_batch, rewards_batch = zip(*sessions)\n",
         "\n",
         "    elite_states, elite_actions = select_elites(states_batch, actions_batch, rewards_batch, percentile)\n",
         "\n",
-        "    new_policy = update_policy(elite_states, elite_actions)\n",
+        "    new_policy = update_policy(elite_states, elite_actions, n_states, n_actions)\n",
         "\n",
         "    policy = learning_rate*new_policy + (1-learning_rate)*policy\n",
         "\n",
@@ -622,9 +622,9 @@
         "\n",
         "In this section you will train a neural network policy for continuous state space game\n",
         "\n",
-        "You can find full description of the environment [here](https://www.gymlibrary.dev/environments/classic_control/cart_pole/).\n",
+        "You can find full description of the environment [here](https://gymnasium.farama.org/environments/classic_control/cart_pole/).\n",
         "\n",
-        "![CartPole-v0\"](https://www.gymlibrary.dev/_images/cart_pole.gif)\n",
+        "![CartPole-v0\"](https://gymnasium.farama.org/_images/cart_pole.gif)\n",
         "\n",
         "So here's how it works:"
       ]
@@ -684,7 +684,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Update the generate_session function and beat the game!"
+        "Update the generate_session_cartpole function and beat the game!"
       ]
     },
     {
@@ -772,7 +772,7 @@
         "sessions_to_send = []\n",
         "for session in sessions:\n",
         "  observations = [x.tolist() for x in session[0]]\n",
-        "  actions = [x.item() for x in session[1]]\n",
+        "  actions = [x.item() if hasattr(x, 'item') else int(x) for x in session[1]]\n",
         "  sessions_to_send.append((observations, actions))\n",
         "\n",
         "import json\n",