convert MDN to KerasV3 (#2050)

aaarrti · web-flow · commit 5b13561eadaf · 2025-02-26T12:01:00.000-08:00
diff --git a/.gitignore b/.gitignore
@@ -15,4 +15,5 @@ templates/**/guides/**/*.md
 templates/keras_hub/getting_started.md
 templates/keras_tuner/getting_started.md
 datasets/*
-.vscode/*
+.vscode/*
+.history
diff --git a/examples/keras_recipes/approximating_non_function_mappings.py b/examples/keras_recipes/approximating_non_function_mappings.py
@@ -27,30 +27,29 @@
 "Mixture Density Networks".
 
 I'm going to use the new
-[multibackend Keras Core project](https://github.com/keras-team/keras-core) to
+[multibackend Keras V3](https://github.com/keras-team/keras) to
 build my Mixture Density networks.
 Great job to the Keras team on the project - it's awesome to be able to swap
 frameworks in one line of code.
 
-Some bad news: I use TensorFlow probability in this guide... so it doesn't
-actually work with other backends.
+Some bad news: I use TensorFlow probability in this guide... so it
+actually works only with TensorFlow and JAX backends.
 
 Anyways, let's start by installing dependencies and sorting out imports:
 """
 """shell
-pip install -q --upgrade tensorflow-probability keras-core
+pip install -q --upgrade jax tensorflow-probability[jax] keras
 """
 
+import os
+
+os.environ["KERAS_BACKEND"] = "jax"
+
 import numpy as np
 import matplotlib.pyplot as plt
-import math
-import random
-from keras_core import callbacks
-import keras_core
-import tensorflow as tf
-from keras_core import layers
-from keras_core import optimizers
-from tensorflow_probability import distributions as tfd
+import keras
+from keras import callbacks, layers, ops
+from tensorflow_probability.substrates.jax import distributions as tfd
 
 """
 Next, lets generate a noisy spiral that we're going to attempt to approximate.
@@ -99,7 +98,7 @@ def create_noisy_spiral(n, jitter_std=0.2, revolutions=2):
 
 N_HIDDEN = 128
 
-model = keras_core.Sequential(
+model = keras.Sequential(
     [
         layers.Dense(N_HIDDEN, activation="relu"),
         layers.Dense(N_HIDDEN, activation="relu"),
@@ -179,7 +178,7 @@ def create_noisy_spiral(n, jitter_std=0.2, revolutions=2):
 
 
 def elu_plus_one_plus_epsilon(x):
-    return keras_core.activations.elu(x) + 1 + keras_core.backend.epsilon()
+    return keras.activations.elu(x) + 1 + keras.backend.epsilon()
 
 
 """
@@ -238,7 +237,7 @@ def call(self, x, mask=None):
 OUTPUT_DIMS = 1
 N_MIXES = 20
 
-mdn_network = keras_core.Sequential(
+mdn_network = keras.Sequential(
     [
         layers.Dense(N_HIDDEN, activation="relu"),
         layers.Dense(N_HIDDEN, activation="relu"),
@@ -255,36 +254,22 @@ def call(self, x, mask=None):
 def get_mixture_loss_func(output_dim, num_mixes):
     def mdn_loss_func(y_true, y_pred):
         # Reshape inputs in case this is used in a TimeDistributed layer
-        y_pred = tf.reshape(
-            y_pred,
-            [-1, (2 * num_mixes * output_dim) + num_mixes],
-            name="reshape_ypreds",
-        )
-        y_true = tf.reshape(y_true, [-1, output_dim], name="reshape_ytrue")
+        y_pred = ops.reshape(y_pred, [-1, (2 * num_mixes * output_dim) + num_mixes])
+        y_true = ops.reshape(y_true, [-1, output_dim])
         # Split the inputs into parameters
-        out_mu, out_sigma, out_pi = tf.split(
-            y_pred,
-            num_or_size_splits=[
-                num_mixes * output_dim,
-                num_mixes * output_dim,
-                num_mixes,
-            ],
-            axis=-1,
-            name="mdn_coef_split",
-        )
+        out_mu, out_sigma, out_pi = ops.split(y_pred, 3, axis=-1)
         # Construct the mixture models
         cat = tfd.Categorical(logits=out_pi)
-        component_splits = [output_dim] * num_mixes
-        mus = tf.split(out_mu, num_or_size_splits=component_splits, axis=1)
-        sigs = tf.split(out_sigma, num_or_size_splits=component_splits, axis=1)
+        mus = ops.split(out_mu, num_mixes, axis=1)
+        sigs = ops.split(out_sigma, num_mixes, axis=1)
         coll = [
             tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale)
             for loc, scale in zip(mus, sigs)
         ]
         mixture = tfd.Mixture(cat=cat, components=coll)
         loss = mixture.log_prob(y_true)
-        loss = tf.negative(loss)
-        loss = tf.reduce_mean(loss)
+        loss = ops.negative(loss)
+        loss = ops.mean(loss)
         return loss
 
     return mdn_loss_func
@@ -349,7 +334,7 @@ def sample_from_categorical(dist):
         accumulate += dist[i]
         if accumulate >= r:
             return i
-    tf.logging.info("Error sampling categorical model.")
+    print("Error sampling categorical model.")
     return -1
 
 
diff --git a/examples/keras_recipes/ipynb/approximating_non_function_mappings.ipynb b/examples/keras_recipes/ipynb/approximating_non_function_mappings.ipynb
@@ -35,17 +35,26 @@
     "\"Mixture Density Networks\".\n",
     "\n",
     "I'm going to use the new\n",
-    "[multibackend Keras Core project](https://github.com/keras-team/keras-core) to\n",
+    "[multibackend Keras V3](https://github.com/keras-team/keras) to\n",
     "build my Mixture Density networks.\n",
     "Great job to the Keras team on the project - it's awesome to be able to swap\n",
     "frameworks in one line of code.\n",
     "\n",
-    "Some bad news: I use TensorFlow probability in this guide... so it doesn't\n",
-    "actually work with other backends.\n",
+    "Some bad news: I use TensorFlow probability in this guide... so it\n",
+    "actually works only with TensorFlow and JAX backends.\n",
     "\n",
     "Anyways, let's start by installing dependencies and sorting out imports:"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%env KERAS_BACKEND=jax"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -54,7 +63,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install -q --upgrade tensorflow-probability keras-core"
+    "%pip install -q --upgrade jax tensorflow-probability[jax] keras"
    ]
   },
   {
@@ -67,14 +76,9 @@
    "source": [
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
-    "import math\n",
-    "import random\n",
-    "from keras_core import callbacks\n",
-    "import keras_core\n",
-    "import tensorflow as tf\n",
-    "from keras_core import layers\n",
-    "from keras_core import optimizers\n",
-    "from tensorflow_probability import distributions as tfd"
+    "import keras\n",
+    "from keras import callbacks, layers, ops\n",
+    "from tensorflow_probability.substrates.jax import distributions as tfd"
    ]
   },
   {
@@ -161,7 +165,7 @@
    "source": [
     "N_HIDDEN = 128\n",
     "\n",
-    "model = keras_core.Sequential(\n",
+    "model = keras.Sequential(\n",
     "    [\n",
     "        layers.Dense(N_HIDDEN, activation=\"relu\"),\n",
     "        layers.Dense(N_HIDDEN, activation=\"relu\"),\n",
@@ -308,7 +312,7 @@
    "source": [
     "\n",
     "def elu_plus_one_plus_epsilon(x):\n",
-    "    return keras_core.activations.elu(x) + 1 + keras_core.backend.epsilon()\n"
+    "    return keras.activations.elu(x) + 1 + keras.backend.epsilon()\n"
    ]
   },
   {
@@ -393,7 +397,7 @@
     "OUTPUT_DIMS = 1\n",
     "N_MIXES = 20\n",
     "\n",
-    "mdn_network = keras_core.Sequential(\n",
+    "mdn_network = keras.Sequential(\n",
     "    [\n",
     "        layers.Dense(N_HIDDEN, activation=\"relu\"),\n",
     "        layers.Dense(N_HIDDEN, activation=\"relu\"),\n",
@@ -420,40 +424,25 @@
    },
    "outputs": [],
    "source": [
-    "\n",
     "def get_mixture_loss_func(output_dim, num_mixes):\n",
     "    def mdn_loss_func(y_true, y_pred):\n",
     "        # Reshape inputs in case this is used in a TimeDistributed layer\n",
-    "        y_pred = tf.reshape(\n",
-    "            y_pred,\n",
-    "            [-1, (2 * num_mixes * output_dim) + num_mixes],\n",
-    "            name=\"reshape_ypreds\",\n",
-    "        )\n",
-    "        y_true = tf.reshape(y_true, [-1, output_dim], name=\"reshape_ytrue\")\n",
+    "        y_pred = ops.reshape(y_pred, [-1, (2 * num_mixes * output_dim) + num_mixes])\n",
+    "        y_true = ops.reshape(y_true, [-1, output_dim])\n",
     "        # Split the inputs into parameters\n",
-    "        out_mu, out_sigma, out_pi = tf.split(\n",
-    "            y_pred,\n",
-    "            num_or_size_splits=[\n",
-    "                num_mixes * output_dim,\n",
-    "                num_mixes * output_dim,\n",
-    "                num_mixes,\n",
-    "            ],\n",
-    "            axis=-1,\n",
-    "            name=\"mdn_coef_split\",\n",
-    "        )\n",
+    "        out_mu, out_sigma, out_pi = ops.split(y_pred, 3, axis=-1)\n",
     "        # Construct the mixture models\n",
     "        cat = tfd.Categorical(logits=out_pi)\n",
-    "        component_splits = [output_dim] * num_mixes\n",
-    "        mus = tf.split(out_mu, num_or_size_splits=component_splits, axis=1)\n",
-    "        sigs = tf.split(out_sigma, num_or_size_splits=component_splits, axis=1)\n",
+    "        mus = ops.split(out_mu, num_mixes, axis=1)\n",
+    "        sigs = ops.split(out_sigma, num_mixes, axis=1)\n",
     "        coll = [\n",
     "            tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale)\n",
     "            for loc, scale in zip(mus, sigs)\n",
     "        ]\n",
     "        mixture = tfd.Mixture(cat=cat, components=coll)\n",
     "        loss = mixture.log_prob(y_true)\n",
-    "        loss = tf.negative(loss)\n",
-    "        loss = tf.reduce_mean(loss)\n",
+    "        loss = ops.negative(loss)\n",
+    "        loss = ops.mean(loss)\n",
     "        return loss\n",
     "\n",
     "    return mdn_loss_func\n",
@@ -560,7 +549,7 @@
     "        accumulate += dist[i]\n",
     "        if accumulate >= r:\n",
     "            return i\n",
-    "    tf.logging.info(\"Error sampling categorical model.\")\n",
+    "    print(\"Error sampling categorical model.\")\n",
     "    return -1\n",
     "\n",
     "\n",
diff --git a/examples/keras_recipes/md/approximating_non_function_mappings.md b/examples/keras_recipes/md/approximating_non_function_mappings.md