google-deepmind
diff --git a/‎open_spiel/colabs/rcfr_pytorch.ipynb‎ ‎open_spiel/colabs/rcfr_pytorch_flax.ipynb‎open_spiel/colabs/rcfr_pytorch.ipynb renamed to open_spiel/colabs/rcfr_pytorch_flax.ipynb
Lines changed: 96 additions & 49 deletions b/‎open_spiel/colabs/rcfr_pytorch.ipynb‎ ‎open_spiel/colabs/rcfr_pytorch_flax.ipynb‎open_spiel/colabs/rcfr_pytorch.ipynb renamed to open_spiel/colabs/rcfr_pytorch_flax.ipynb
Lines changed: 96 additions & 49 deletions
@@ -6,80 +6,129 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np\n",
+    "%pip install -U open_spiel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optional module pokerkit_wrapper was not importable: No module named 'pokerkit'\n"
+     ]
+    }
+   ],
+   "source": [
     "import pyspiel \n",
-    "import tensorflow.compat.v1 as tf\n",
+    "import functools\n",
     "import torch \n",
     "import torch.nn as nn\n",
     "\n",
-    "import algorithms.rcfr as rcfr_tf\n",
-    "import pytorch.rcfr as rcfr_pt\n",
-    "tf.disable_v2_behavior()\n",
+    "import jax\n",
+    "import flax.nnx as nnx\n",
+    "import optax\n",
     "\n",
-    "tf.enable_eager_execution()\n",
+    "from open_spiel.python.pytorch import rcfr as rcfr_pt\n",
+    "from open_spiel.python.jax import rcfr as rcfr_jax\n",
     "\n",
-    "_GAME = pyspiel.load_game('kuhn_poker')\n",
-    "_BATCH_SIZE = 12"
+    "game = pyspiel.load_game('kuhn_poker')\n",
+    "batch_size = 12"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def tnsorflow_example(game_name, num_epochs, iterations):\n",
+    "def flax_example(game_name, num_epochs, iterations):\n",
+    "  \n",
+    "  @nnx.vmap(in_axes=(None, 0), out_axes=0)\n",
+    "  def forward(model: nnx.Module, x: jax.Array) -> jax.Array:\n",
+    "    \"\"\"Batched call for the flax.nnx model.\"\"\"\n",
+    "    return model(x)\n",
+    "  \n",
+    "  @functools.partial(jax.jit, static_argnames=(\"graphdef\",))\n",
+    "  def jax_train_step(\n",
+    "    graphdef: nnx.GraphDef, state: nnx.State, x: jax.Array, y: jax.Array\n",
+    "  ) -> tuple:\n",
+    "    \"\"\"Train step in pure jax.\"\"\"\n",
+    "\n",
+    "    model, optimizer = nnx.merge(graphdef, state, copy=True)\n",
+    "\n",
+    "    def loss_fn(model):\n",
+    "      y_pred = forward(model, x)\n",
+    "      return optax.hinge_loss(y_pred, y).mean()\n",
+    "\n",
+    "    loss, grads = nnx.value_and_grad(loss_fn)(model)\n",
+    "    optimizer.update(model, grads)\n",
+    "    state = nnx.state((model, optimizer))\n",
+    "    return loss, state\n",
+    "\n",
     "  game = pyspiel.load_game(game_name)\n",
     "\n",
     "  models = []\n",
     "  for _ in range(game.num_players()):\n",
     "    models.append(\n",
-    "        rcfr_tf.DeepRcfrModel(\n",
+    "        rcfr_jax.DeepRcfrModel(\n",
     "            game,\n",
     "            num_hidden_layers=1,\n",
     "            num_hidden_units=13,\n",
     "            num_hidden_factors=8,\n",
     "            use_skip_connections=True))\n",
     "\n",
+    "  # these parameters are fixed initially\n",
     "  buffer_size = -1\n",
     "  truncate_negative = False\n",
     "  bootstrap = False\n",
+    "\n",
     "  if buffer_size > 0:\n",
-    "    solver = rcfr_tf.ReservoirRcfrSolver(\n",
+    "    solver = rcfr_jax.ReservoirRcfrSolver(\n",
     "        game,\n",
     "        models,\n",
     "        buffer_size,\n",
     "        truncate_negative=truncate_negative)\n",
     "  else:\n",
-    "    solver = rcfr_tf.RcfrSolver(\n",
+    "    solver = rcfr_jax.RcfrSolver(\n",
     "        game,\n",
     "        models,\n",
     "        truncate_negative=truncate_negative,\n",
     "        bootstrap=bootstrap)\n",
+    "    \n",
+    "  batch_size = 12\n",
+    "  step_size = 0.01\n",
     "\n",
-    "  def _train_fn(model, data):\n",
-    "    \"\"\"Train `model` on `data`.\"\"\"\n",
-    "    batch_size = 100\n",
-    "    step_size = 0.01\n",
-    "    data = data.shuffle(batch_size * 10)\n",
-    "    data = data.batch(batch_size)\n",
-    "    data = data.repeat(num_epochs)\n",
+    "  def _train_fn(model: nn.Module, data: tuple) -> None:\n",
     "\n",
-    "    optimizer = tf.keras.optimizers.Adam(lr=step_size, amsgrad=True)\n",
+    "    \"\"\"Train `model` on `data`.\"\"\"\n",
+    "    data_, rng = data\n",
+    "    optimizer = nnx.Optimizer(\n",
+    "      model, optax.amsgrad(learning_rate=step_size), wrt=nnx.Param\n",
+    "    )\n",
+    "    graphdef, state = nnx.split((model, optimizer))\n",
     "\n",
-    "    @tf.function\n",
-    "    def _train():\n",
-    "      for x, y in data:\n",
-    "        optimizer.minimize(\n",
-    "            lambda: tf.losses.huber_loss(y, model(x), delta=0.01),  # pylint: disable=cell-var-from-loop\n",
-    "            model.trainable_variables)\n",
+    "    num_batches = len(data_[0]) // batch_size\n",
+    "    data_ = jax.tree.map(\n",
+    "      lambda x: jax.random.permutation(rng, x, axis=0).reshape(\n",
+    "        num_batches, batch_size, -1\n",
+    "      ),\n",
+    "      data_,\n",
+    "    )\n",
     "\n",
-    "    _train()\n",
+    "    for _ in range(num_epochs):\n",
+    "      for x, y in zip(*data_):\n",
+    "        _, state = jax_train_step(graphdef, state, x, y.squeeze(-1))\n",
     "\n",
-    "  # End of _train_fn\n",
+    "    nnx.update((model, optimizer), state)\n",
+    "    return\n",
+    "  \n",
     "  result = []\n",
     "  for i in range(iterations):\n",
-    "    solver.evaluate_and_update_policy(_train_fn)\n",
+    "    solver.evaluate_and_update_policy(_train_fn, jax.random.key(i))\n",
     "    if i % 10 == 0:\n",
     "      conv = pyspiel.exploitability(game, solver.average_policy())\n",
     "      result.append(conv)\n",
@@ -89,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -125,15 +174,14 @@
     "  def _train_fn(model, data):\n",
     "    \"\"\"Train `model` on `data`.\"\"\"\n",
     "    batch_size = 100\n",
-    "    num_epochs = 20\n",
     "    step_size = 0.01\n",
     "    \n",
     "    data = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)\n",
     "    loss_fn = nn.SmoothL1Loss()\n",
     "    optimizer = torch.optim.Adam(model.parameters(), lr=step_size, amsgrad=True)\n",
     "\n",
     "    def _train(model, data):\n",
-    "      for epoch in range(num_epochs):\n",
+    "      for _ in range(num_epochs):\n",
     "        for x, y in data:\n",
     "          optimizer.zero_grad()\n",
     "          output = model(x)\n",
@@ -150,7 +198,6 @@
     "    if i % 10 == 0:\n",
     "      conv = pyspiel.exploitability(game, solver.average_policy())\n",
     "      result.append(conv)\n",
-    "      # print(\"Iteration {} exploitability {}\".format(i, conv))\n",
     "  return result"
    ]
   },
@@ -160,11 +207,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tensorflow_rcfr = []\n",
+    "flax_rcfr = []\n",
     "pytorch_rcfr = []\n",
     "num_epochs, iterations = 20, 100\n",
     "for _ in range(10):\n",
-    "  tensorflow_rcfr.append(tnsorflow_example('kuhn_poker', num_epochs, iterations))\n",
+    "  flax_rcfr.append(flax_example('kuhn_poker', num_epochs, iterations))\n",
     "  pytorch_rcfr.append(pytorch_example('kuhn_poker', num_epochs, iterations))"
    ]
   },
@@ -177,10 +224,10 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "x = [i for i in range(10)]\n",
-    "tf_exploitability = [sum(tfe) for tfe in zip(*tensorflow_rcfr)]\n",
+    "flax_exploitability = [sum(tfe) for tfe in zip(*flax_rcfr)]\n",
     "pt_exploitability = [sum(pte) for pte in zip(*pytorch_rcfr)]\n",
     "\n",
-    "plt.plot(x, tf_exploitability, label=\"tensorflow\")\n",
+    "plt.plot(x, flax_exploitability, label=\"flax.nnx\")\n",
     "plt.plot(x, pt_exploitability, label=\"pytorch\")\n",
     "\n",
     "plt.legend()\n",
@@ -194,11 +241,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tensorflow_rcfr = []\n",
+    "flax_rcfr = []\n",
     "pytorch_rcfr = []\n",
-    "num_epochs, iterations = 200, 100\n",
+    "num_epochs, iterations = 200, 100 \n",
     "for _ in range(10):\n",
-    "  tensorflow_rcfr.append(tnsorflow_example('kuhn_poker', num_epochs, iterations))\n",
+    "  flax_rcfr.append(flax_example('kuhn_poker', num_epochs, iterations))\n",
     "  pytorch_rcfr.append(pytorch_example('kuhn_poker', num_epochs, iterations))"
    ]
   },
@@ -211,10 +258,10 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "x = [i for i in range(10)]\n",
-    "tf_exploitability = [sum(tfe) for tfe in zip(*tensorflow_rcfr)]\n",
+    "flax_exploitability = [sum(tfe) for tfe in zip(*flax_rcfr)]\n",
     "pt_exploitability = [sum(pte) for pte in zip(*pytorch_rcfr)]\n",
     "\n",
-    "plt.plot(x, tf_exploitability, label=\"tensorflow\")\n",
+    "plt.plot(x, flax_exploitability, label=\"flax_nnx\")\n",
     "plt.plot(x, pt_exploitability, label=\"pytorch\")\n",
     "\n",
     "plt.legend()\n",
@@ -228,11 +275,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tensorflow_rcfr = []\n",
+    "flax_rcfr = []\n",
     "pytorch_rcfr = []\n",
     "num_epochs, iterations = 20, 100\n",
     "for _ in range(10):\n",
-    "  tensorflow_rcfr.append(tnsorflow_example('leduc_poker', num_epochs, iterations))\n",
+    "  flax_rcfr.append(flax_example('leduc_poker', num_epochs, iterations))\n",
     "  pytorch_rcfr.append(pytorch_example('leduc_poker', num_epochs, iterations))"
    ]
   },
@@ -245,10 +292,10 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "x = [i for i in range(10)]\n",
-    "tf_exploitability = [sum(tfe) for tfe in zip(*tensorflow_rcfr)]\n",
+    "flax_exploitability = [sum(tfe) for tfe in zip(*flax_rcfr)]\n",
     "pt_exploitability = [sum(pte) for pte in zip(*pytorch_rcfr)]\n",
     "\n",
-    "plt.plot(x, tf_exploitability, label=\"tensorflow\")\n",
+    "plt.plot(x, flax_exploitability, label=\"flax_nnx\")\n",
     "plt.plot(x, pt_exploitability, label=\"pytorch\")\n",
     "\n",
     "plt.legend()\n",
@@ -266,7 +313,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "open_spiel",
    "language": "python",
    "name": "python3"
   },
@@ -280,7 +327,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.12.11"
   }
  },
  "nbformat": 4,