Replace unicode escaped characters in ipynb files

AleksMat · The precondition Authors · commit a1a971e50c5f · 2026-01-09T09:29:02.000-08:00
PiperOrigin-RevId: 854213767
diff --git a/precondition/datamix_gemma/Pretokenization_for_Dolly,_MetaMath,_and_CodeAlpaca,_OpenWebMath.ipynb b/precondition/datamix_gemma/Pretokenization_for_Dolly,_MetaMath,_and_CodeAlpaca,_OpenWebMath.ipynb
@@ -308,15 +308,15 @@
         "    self._spm_processor = spm_processor\n",
         "\n",
         "  @property\n",
-        "  def pad_id(self) -\u003e int:\n",
+        "  def pad_id(self) -> int:\n",
         "    \"\"\"Fast access to the pad id.\"\"\"\n",
         "    return self._spm_processor.pad_id()\n",
         "\n",
         "  def tokenize(self,\n",
         "               example: str | bytes,\n",
         "               prefix: str = '',\n",
         "               suffix: str = '',\n",
-        "               add_eos: bool = True) -\u003e jax.Array:\n",
+        "               add_eos: bool = True) -> jax.Array:\n",
         "    \"\"\"\n",
         "    Tokenization function.\n",
         "\n",
@@ -340,7 +340,7 @@
         "                     str_tensor: tf.Tensor,\n",
         "                     prefix: str = '',\n",
         "                     suffix: str = '',\n",
-        "                     add_eos: bool = True) -\u003e tf.Tensor:\n",
+        "                     add_eos: bool = True) -> tf.Tensor:\n",
         "    \"\"\"Tensforflow operator for the tokenize function.\"\"\"\n",
         "    encoded = tf.numpy_function(\n",
         "        self.tokenize,\n",
@@ -349,7 +349,7 @@
         "    encoded.set_shape([None])\n",
         "    return encoded\n",
         "\n",
-        "  def to_string(self, tokens: jax.Array) -\u003e str:\n",
+        "  def to_string(self, tokens: jax.Array) -> str:\n",
         "    \"\"\"Convert an array of tokens to a string.\"\"\"\n",
         "    return self._spm_processor.EncodeIds(tokens.tolist())"
       ]
@@ -396,7 +396,7 @@
         "\n",
         "  def _pad_up_to_max_len(\n",
         "      self, input_tensor: tf.Tensor, pad_value: int | bool\n",
-        "  ) -\u003e tf.Tensor:\n",
+        "  ) -> tf.Tensor:\n",
         "    \"\"\"Pads the given tensor up to max_seq_len.\"\"\"\n",
         "    seq_len = tf.shape(input_tensor)[0]\n",
         "    to_pad = tf.maximum(0, self._max_seq_len - seq_len)\n",
@@ -518,7 +518,7 @@
         "    )\n",
         "    ds = ds.map(lambda x, y: self._to_training_input(x, y),\n",
         "                num_parallel_calls=tf.data.AUTOTUNE)\n",
-        "    ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] \u003c= self._max_seq_len)\n",
+        "    ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] <= self._max_seq_len)\n",
         "    ds = ds.shuffle(buffer_size=self.BUFFER_SIZE_SHUFFLE)\n",
         "    return ds"
       ]
@@ -656,7 +656,7 @@
         "        )\n",
         "    )\n",
         "    ds = ds.map(lambda x, y, z: self._to_training_input(x, y, z))\n",
-        "    ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] \u003c= self._max_seq_len)\n",
+        "    ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] <= self._max_seq_len)\n",
         "    ds = ds.shuffle(buffer_size=self.BUFFER_SIZE_SHUFFLE)\n",
         "    return ds"
       ]
@@ -802,7 +802,7 @@
         "        )\n",
         "    )\n",
         "    ds = ds.map(lambda x, y, z: self._to_training_input(x, y, z))\n",
-        "    ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] \u003c= self._max_seq_len)\n",
+        "    ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] <= self._max_seq_len)\n",
         "    ds = ds.shuffle(buffer_size=self.BUFFER_SIZE_SHUFFLE)\n",
         "\n",
         "    return ds"
@@ -949,7 +949,7 @@
         "        )\n",
         "    )\n",
         "    ds = ds.map(lambda x, y, z: self._to_training_input(x, y, z))\n",
-        "    ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] \u003c= self._max_seq_len)\n",
+        "    ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] <= self._max_seq_len)\n",
         "    ds = ds.shuffle(buffer_size=self.BUFFER_SIZE_SHUFFLE)\n",
         "\n",
         "    return ds"