Skip to content

Commit c8d2255

Browse files
committed
fix signatures for task1, now they synced to the ones in template_crossentropy.py
1 parent 6b55e51 commit c8d2255

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

homeworks/hw02_cross_entropy/01_crossentropy_method.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@
493493
"elite_states = [1, 2, 3, 4, 2, 0, 2, 3, 1]\n",
494494
"elite_actions = [0, 2, 4, 3, 2, 0, 1, 3, 3]\n",
495495
"\n",
496-
"new_policy = update_policy(elite_states, elite_actions)\n",
496+
"new_policy = update_policy(elite_states, elite_actions, n_states, n_actions)\n",
497497
"\n",
498498
"assert np.isfinite(new_policy).all(\n",
499499
"), \"Your new policy contains NaNs or +-inf. Make sure you don't divide by zero.\"\n",
@@ -587,13 +587,13 @@
587587
"\n",
588588
"for i in range(100):\n",
589589
"\n",
590-
" %time sessions = [generate_session(policy) for _ in range(n_sessions)]\n",
590+
" %time sessions = [generate_session(env, policy) for _ in range(n_sessions)]\n",
591591
"\n",
592592
" states_batch, actions_batch, rewards_batch = zip(*sessions)\n",
593593
"\n",
594594
" elite_states, elite_actions = select_elites(states_batch, actions_batch, rewards_batch, percentile)\n",
595595
"\n",
596-
" new_policy = update_policy(elite_states, elite_actions)\n",
596+
" new_policy = update_policy(elite_states, elite_actions, n_states, n_actions)\n",
597597
"\n",
598598
" policy = learning_rate*new_policy + (1-learning_rate)*policy\n",
599599
"\n",

0 commit comments

Comments
 (0)