diff --git a/FA/Q-Learning with Value Function Approximation Solution.ipynb b/FA/Q-Learning with Value Function Approximation Solution.ipynb index a271d6a63..40d843157 100644 --- a/FA/Q-Learning with Value Function Approximation Solution.ipynb +++ b/FA/Q-Learning with Value Function Approximation Solution.ipynb @@ -171,7 +171,7 @@ " \n", " \"\"\"\n", " def policy_fn(observation):\n", - " A = np.ones(nA, dtype=float) * epsilon / nA\n", + " A = np.ones(nA, dtype=float) * epsilon / (nA - 1)\n", " q_values = estimator.predict(observation)\n", " best_action = np.argmax(q_values)\n", " A[best_action] += (1.0 - epsilon)\n", @@ -369,22 +369,23 @@ } ], "metadata": { + "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python [conda root]", "language": "python", - "name": "python3" + "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.1" + "pygments_lexer": "ipython2", + "version": "2.7.12" } }, "nbformat": 4,