keras-team · hertschuh · Feb 26, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 29, 2025
diff --git a/examples/structured_data/ipynb/movielens_recommendations_transformers.ipynb b/examples/structured_data/ipynb/movielens_recommendations_transformers.ipynb
@@ -10,7 +10,7 @@
     "\n",
     "**Author:** [Khalid Salama](https://www.linkedin.com/in/khalid-salama-24403144/)<br>\n",
     "**Date created:** 2020/12/30<br>\n",
-    "**Last modified:** 2025/01/03<br>\n",
+    "**Last modified:** 2025/01/27<br>\n",
     "**Description:** Rating rate prediction using the Behavior Sequence Transformer (BST) model on the Movielens."
    ]
   },
@@ -82,17 +82,16 @@
    "source": [
     "import os\n",
     "\n",
-    "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"jax\"  # or torch, or tensorflow\n",
     "\n",
     "import math\n",
     "from zipfile import ZipFile\n",
     "from urllib.request import urlretrieve\n",
-    "\n",
-    "import keras\n",
     "import numpy as np\n",
     "import pandas as pd\n",
-    "import tensorflow as tf\n",
-    "from keras import layers\n",
+    "\n",
+    "import keras\n",
+    "from keras import layers, ops\n",
     "from keras.layers import StringLookup"
    ]
   },
@@ -408,7 +407,8 @@
     "\n",
     "USER_FEATURES = [\"sex\", \"age_group\", \"occupation\"]\n",
     "\n",
-    "MOVIE_FEATURES = [\"genres\"]"
+    "MOVIE_FEATURES = [\"genres\"]\n",
+    ""
    ]
   },
   {
@@ -417,7 +417,30 @@
     "colab_type": "text"
    },
    "source": [
-    "## Create `tf.data.Dataset` for training and evaluation"
+    "## Encode input features\n",
+    "\n",
+    "The `encode_input_features` function works as follows:\n",
+    "\n",
+    "1. Each categorical user feature is encoded using `layers.Embedding`, with embedding\n",
+    "dimension equals to the square root of the vocabulary size of the feature.\n",
+    "The embeddings of these features are concatenated to form a single input tensor.\n",
+    "\n",
+    "2. Each movie in the movie sequence and the target movie is encoded `layers.Embedding`,\n",
+    "where the dimension size is the square root of the number of movies.\n",
+    "\n",
+    "3. A multi-hot genres vector for each movie is concatenated with its embedding vector,\n",
+    "and processed using a non-linear `layers.Dense` to output a vector of the same movie\n",
+    "embedding dimensions.\n",
+    "\n",
+    "4. A positional embedding is added to each movie embedding in the sequence, and then\n",
+    "multiplied by its rating from the ratings sequence.\n",
+    "\n",
+    "5. The target movie embedding is concatenated to the sequence movie embeddings, producing\n",
+    "a tensor with the shape of `[batch size, sequence length, embedding size]`, as expected\n",
+    "by the attention layer for the transformer architecture.\n",
+    "\n",
+    "6. The method returns a tuple of two elements:  `encoded_transformer_features` and\n",
+    "`encoded_other_features`."
    ]
   },
   {
@@ -428,25 +451,60 @@
    },
    "outputs": [],
    "source": [
+    "# Required for tf.data.Dataset\n",
+    "import tensorflow as tf\n",
+    "\n",
     "\n",
     "def get_dataset_from_csv(csv_file_path, batch_size, shuffle=True):\n",
+    "\n",
     "    def process(features):\n",
     "        movie_ids_string = features[\"sequence_movie_ids\"]\n",
     "        sequence_movie_ids = tf.strings.split(movie_ids_string, \",\").to_tensor()\n",
-    "\n",
     "        # The last movie id in the sequence is the target movie.\n",
     "        features[\"target_movie_id\"] = sequence_movie_ids[:, -1]\n",
     "        features[\"sequence_movie_ids\"] = sequence_movie_ids[:, :-1]\n",
-    "\n",
+    "        # Sequence ratings\n",
     "        ratings_string = features[\"sequence_ratings\"]\n",
     "        sequence_ratings = tf.strings.to_number(\n",
     "            tf.strings.split(ratings_string, \",\"), tf.dtypes.float32\n",
     "        ).to_tensor()\n",
-    "\n",
     "        # The last rating in the sequence is the target for the model to predict.\n",
     "        target = sequence_ratings[:, -1]\n",
     "        features[\"sequence_ratings\"] = sequence_ratings[:, :-1]\n",
     "\n",
+    "        def encoding_helper(feature_name):\n",
+    "\n",
+    "            # This are target_movie_id and sequence_movie_ids and they have the same\n",
+    "            # vocabulary as movie_id.\n",
+    "            if feature_name not in CATEGORICAL_FEATURES_WITH_VOCABULARY:\n",
+    "                vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[\"movie_id\"]\n",
+    "                index_lookup = StringLookup(\n",
+    "                    vocabulary=vocabulary, mask_token=None, num_oov_indices=0\n",
+    "                )\n",
+    "                # Convert the string input values into integer indices.\n",
+    "                value_index = index_lookup(features[feature_name])\n",
+    "                features[feature_name] = value_index\n",
+    "            else:\n",
+    "                # movie_id is not part of the features, hence not processed. It was mainly required\n",
+    "                # for its vocabulary above.\n",
+    "                if feature_name == \"movie_id\":\n",
+    "                    pass\n",
+    "                else:\n",
+    "                    vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n",
+    "                    index_lookup = StringLookup(\n",
+    "                        vocabulary=vocabulary, mask_token=None, num_oov_indices=0\n",
+    "                    )\n",
+    "                    # Convert the string input values into integer indices.\n",
+    "                    value_index = index_lookup(features[feature_name])\n",
+    "                    features[feature_name] = value_index\n",
+    "\n",
+    "        # Encode the user features\n",
+    "        for feature_name in CATEGORICAL_FEATURES_WITH_VOCABULARY:\n",
+    "            encoding_helper(feature_name)\n",
+    "        # Encoding target_movie_id and returning it as the target variable\n",
+    "        encoding_helper(\"target_movie_id\")\n",
+    "        # Encoding sequence movie_ids.\n",
+    "        encoding_helper(\"sequence_movie_ids\")\n",
     "        return dict(features), target\n",
     "\n",
     "    dataset = tf.data.experimental.make_csv_dataset(\n",
@@ -458,94 +516,14 @@
     "        field_delim=\"|\",\n",
     "        shuffle=shuffle,\n",
     "    ).map(process)\n",
-    "\n",
     "    return dataset\n",
-    ""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "## Create model inputs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
-    "\n",
-    "def create_model_inputs():\n",
-    "    return {\n",
-    "        \"user_id\": keras.Input(name=\"user_id\", shape=(1,), dtype=\"string\"),\n",
-    "        \"sequence_movie_ids\": keras.Input(\n",
-    "            name=\"sequence_movie_ids\", shape=(sequence_length - 1,), dtype=\"string\"\n",
-    "        ),\n",
-    "        \"target_movie_id\": keras.Input(\n",
-    "            name=\"target_movie_id\", shape=(1,), dtype=\"string\"\n",
-    "        ),\n",
-    "        \"sequence_ratings\": keras.Input(\n",
-    "            name=\"sequence_ratings\", shape=(sequence_length - 1,), dtype=tf.float32\n",
-    "        ),\n",
-    "        \"sex\": keras.Input(name=\"sex\", shape=(1,), dtype=\"string\"),\n",
-    "        \"age_group\": keras.Input(name=\"age_group\", shape=(1,), dtype=\"string\"),\n",
-    "        \"occupation\": keras.Input(name=\"occupation\", shape=(1,), dtype=\"string\"),\n",
-    "    }\n",
-    ""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text"
-   },
-   "source": [
-    "## Encode input features\n",
-    "\n",
-    "The `encode_input_features` method works as follows:\n",
-    "\n",
-    "1. Each categorical user feature is encoded using `layers.Embedding`, with embedding\n",
-    "dimension equals to the square root of the vocabulary size of the feature.\n",
-    "The embeddings of these features are concatenated to form a single input tensor.\n",
     "\n",
-    "2. Each movie in the movie sequence and the target movie is encoded `layers.Embedding`,\n",
-    "where the dimension size is the square root of the number of movies.\n",
-    "\n",
-    "3. A multi-hot genres vector for each movie is concatenated with its embedding vector,\n",
-    "and processed using a non-linear `layers.Dense` to output a vector of the same movie\n",
-    "embedding dimensions.\n",
-    "\n",
-    "4. A positional embedding is added to each movie embedding in the sequence, and then\n",
-    "multiplied by its rating from the ratings sequence.\n",
-    "\n",
-    "5. The target movie embedding is concatenated to the sequence movie embeddings, producing\n",
-    "a tensor with the shape of `[batch size, sequence length, embedding size]`, as expected\n",
-    "by the attention layer for the transformer architecture.\n",
-    "\n",
-    "6. The method returns a tuple of two elements:  `encoded_transformer_features` and\n",
-    "`encoded_other_features`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab_type": "code"
-   },
-   "outputs": [],
-   "source": [
     "\n",
     "def encode_input_features(\n",
     "    inputs,\n",
-    "    include_user_id=True,\n",
-    "    include_user_features=True,\n",
-    "    include_movie_features=True,\n",
+    "    include_user_id,\n",
+    "    include_user_features,\n",
+    "    include_movie_features,\n",
     "):\n",
     "    encoded_transformer_features = []\n",
     "    encoded_other_features = []\n",
@@ -558,11 +536,7 @@
     "\n",
     "    ## Encode user features\n",
     "    for feature_name in other_feature_names:\n",
-    "        # Convert the string input values into integer indices.\n",
     "        vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n",
-    "        idx = StringLookup(vocabulary=vocabulary, mask_token=None, num_oov_indices=0)(\n",
-    "            inputs[feature_name]\n",
-    "        )\n",
     "        # Compute embedding dimensions\n",
     "        embedding_dims = int(math.sqrt(len(vocabulary)))\n",
     "        # Create an embedding layer with the specified dimensions.\n",
@@ -572,7 +546,7 @@
     "            name=f\"{feature_name}_embedding\",\n",
     "        )\n",
     "        # Convert the index values to embedding representations.\n",
-    "        encoded_other_features.append(embedding_encoder(idx))\n",
+    "        encoded_other_features.append(embedding_encoder(inputs[feature_name]))\n",
     "\n",
     "    ## Create a single embedding vector for the user features\n",
     "    if len(encoded_other_features) > 1:\n",
@@ -585,13 +559,6 @@
     "    ## Create a movie embedding encoder\n",
     "    movie_vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[\"movie_id\"]\n",
     "    movie_embedding_dims = int(math.sqrt(len(movie_vocabulary)))\n",
-    "    # Create a lookup to convert string values to integer indices.\n",
-    "    movie_index_lookup = StringLookup(\n",
-    "        vocabulary=movie_vocabulary,\n",
-    "        mask_token=None,\n",
-    "        num_oov_indices=0,\n",
-    "        name=\"movie_index_lookup\",\n",
-    "    )\n",
     "    # Create an embedding layer with the specified dimensions.\n",
     "    movie_embedding_encoder = layers.Embedding(\n",
     "        input_dim=len(movie_vocabulary),\n",
@@ -617,11 +584,10 @@
     "    ## Define a function to encode a given movie id.\n",
     "    def encode_movie(movie_id):\n",
     "        # Convert the string input values into integer indices.\n",
-    "        movie_idx = movie_index_lookup(movie_id)\n",
-    "        movie_embedding = movie_embedding_encoder(movie_idx)\n",
+    "        movie_embedding = movie_embedding_encoder(movie_id)\n",
     "        encoded_movie = movie_embedding\n",
     "        if include_movie_features:\n",
-    "            movie_genres_vector = movie_genres_lookup(movie_idx)\n",
+    "            movie_genres_vector = movie_genres_lookup(movie_id)\n",
     "            encoded_movie = movie_embedding_processor(\n",
     "                layers.concatenate([movie_embedding, movie_genres_vector])\n",
     "            )\n",
@@ -640,11 +606,11 @@
     "        output_dim=movie_embedding_dims,\n",
     "        name=\"position_embedding\",\n",
     "    )\n",
-    "    positions = tf.range(start=0, limit=sequence_length - 1, delta=1)\n",
+    "    positions = ops.arange(start=0, stop=sequence_length - 1, step=1)\n",
     "    encodded_positions = position_embedding_encoder(positions)\n",
     "    # Retrieve sequence ratings to incorporate them into the encoding of the movie.\n",
     "    sequence_ratings = inputs[\"sequence_ratings\"]\n",
-    "    sequence_ratings = keras.ops.expand_dims(sequence_ratings, -1)\n",
+    "    sequence_ratings = ops.expand_dims(sequence_ratings, -1)\n",
     "    # Add the positional encoding to the movie encodings and multiply them by rating.\n",
     "    encoded_sequence_movies_with_poistion_and_rating = layers.Multiply()(\n",
     "        [(encoded_sequence_movies + encodded_positions), sequence_ratings]\n",
@@ -653,18 +619,53 @@
     "    # Construct the transformer inputs.\n",
     "    for i in range(sequence_length - 1):\n",
     "        feature = encoded_sequence_movies_with_poistion_and_rating[:, i, ...]\n",
-    "        feature = keras.ops.expand_dims(feature, 1)\n",
+    "        feature = ops.expand_dims(feature, 1)\n",
     "        encoded_transformer_features.append(feature)\n",
     "    encoded_transformer_features.append(encoded_target_movie)\n",
-    "\n",
     "    encoded_transformer_features = layers.concatenate(\n",
     "        encoded_transformer_features, axis=1\n",
     "    )\n",
-    "\n",
     "    return encoded_transformer_features, encoded_other_features\n",
     ""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text"
+   },
+   "source": [
+    "## Create model inputs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab_type": "code"
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "def create_model_inputs():\n",
+    "    return {\n",
+    "        \"user_id\": keras.Input(name=\"user_id\", shape=(1,), dtype=\"int32\"),\n",
+    "        \"sequence_movie_ids\": keras.Input(\n",
+    "            name=\"sequence_movie_ids\", shape=(sequence_length - 1,), dtype=\"int32\"\n",
+    "        ),\n",
+    "        \"target_movie_id\": keras.Input(\n",
+    "            name=\"target_movie_id\", shape=(1,), dtype=\"int32\"\n",
+    "        ),\n",
+    "        \"sequence_ratings\": keras.Input(\n",
+    "            name=\"sequence_ratings\", shape=(sequence_length - 1,), dtype=\"float32\"\n",
+    "        ),\n",
+    "        \"sex\": keras.Input(name=\"sex\", shape=(1,), dtype=\"int32\"),\n",
+    "        \"age_group\": keras.Input(name=\"age_group\", shape=(1,), dtype=\"int32\"),\n",
+    "        \"occupation\": keras.Input(name=\"occupation\", shape=(1,), dtype=\"int32\"),\n",
+    "    }\n",
+    ""
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -692,11 +693,11 @@
     "\n",
     "\n",
     "def create_model():\n",
+    "\n",
     "    inputs = create_model_inputs()\n",
     "    transformer_features, other_features = encode_input_features(\n",
     "        inputs, include_user_id, include_user_features, include_movie_features\n",
     "    )\n",
-    "\n",
     "    # Create a multi-headed attention layer.\n",
     "    attention_output = layers.MultiHeadAttention(\n",
     "        num_heads=num_heads, key_dim=transformer_features.shape[2], dropout=dropout_rate\n",
@@ -713,7 +714,7 @@
     "    transformer_features = layers.LayerNormalization()(transformer_features)\n",
     "    features = layers.Flatten()(transformer_features)\n",
     "\n",
-    "    # Included the other features.\n",
+    "    # Included the other_features.\n",
     "    if other_features is not None:\n",
     "        features = layers.concatenate(\n",
     "            [features, layers.Reshape([other_features.shape[-1]])(other_features)]\n",
@@ -725,7 +726,6 @@
     "        features = layers.BatchNormalization()(features)\n",
     "        features = layers.LeakyReLU()(features)\n",
     "        features = layers.Dropout(dropout_rate)(features)\n",
-    "\n",
     "    outputs = layers.Dense(units=1)(features)\n",
     "    model = keras.Model(inputs=inputs, outputs=outputs)\n",
     "    return model\n",
@@ -759,6 +759,7 @@
     ")\n",
     "\n",
     "# Read the training data.\n",
+    "\n",
     "train_dataset = get_dataset_from_csv(\"train_data.csv\", batch_size=265, shuffle=True)\n",
     "\n",
     "# Fit the model with the training data.\n",