Skip to content

Commit a1a971e

Browse files
AleksMatThe precondition Authors
authored andcommitted
Replace unicode escaped characters in ipynb files
PiperOrigin-RevId: 854213767
1 parent 239a8cd commit a1a971e

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

precondition/datamix_gemma/Pretokenization_for_Dolly,_MetaMath,_and_CodeAlpaca,_OpenWebMath.ipynb

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -308,15 +308,15 @@
308308
" self._spm_processor = spm_processor\n",
309309
"\n",
310310
" @property\n",
311-
" def pad_id(self) -\u003e int:\n",
311+
" def pad_id(self) -> int:\n",
312312
" \"\"\"Fast access to the pad id.\"\"\"\n",
313313
" return self._spm_processor.pad_id()\n",
314314
"\n",
315315
" def tokenize(self,\n",
316316
" example: str | bytes,\n",
317317
" prefix: str = '',\n",
318318
" suffix: str = '',\n",
319-
" add_eos: bool = True) -\u003e jax.Array:\n",
319+
" add_eos: bool = True) -> jax.Array:\n",
320320
" \"\"\"\n",
321321
" Tokenization function.\n",
322322
"\n",
@@ -340,7 +340,7 @@
340340
" str_tensor: tf.Tensor,\n",
341341
" prefix: str = '',\n",
342342
" suffix: str = '',\n",
343-
" add_eos: bool = True) -\u003e tf.Tensor:\n",
343+
" add_eos: bool = True) -> tf.Tensor:\n",
344344
" \"\"\"Tensforflow operator for the tokenize function.\"\"\"\n",
345345
" encoded = tf.numpy_function(\n",
346346
" self.tokenize,\n",
@@ -349,7 +349,7 @@
349349
" encoded.set_shape([None])\n",
350350
" return encoded\n",
351351
"\n",
352-
" def to_string(self, tokens: jax.Array) -\u003e str:\n",
352+
" def to_string(self, tokens: jax.Array) -> str:\n",
353353
" \"\"\"Convert an array of tokens to a string.\"\"\"\n",
354354
" return self._spm_processor.EncodeIds(tokens.tolist())"
355355
]
@@ -396,7 +396,7 @@
396396
"\n",
397397
" def _pad_up_to_max_len(\n",
398398
" self, input_tensor: tf.Tensor, pad_value: int | bool\n",
399-
" ) -\u003e tf.Tensor:\n",
399+
" ) -> tf.Tensor:\n",
400400
" \"\"\"Pads the given tensor up to max_seq_len.\"\"\"\n",
401401
" seq_len = tf.shape(input_tensor)[0]\n",
402402
" to_pad = tf.maximum(0, self._max_seq_len - seq_len)\n",
@@ -518,7 +518,7 @@
518518
" )\n",
519519
" ds = ds.map(lambda x, y: self._to_training_input(x, y),\n",
520520
" num_parallel_calls=tf.data.AUTOTUNE)\n",
521-
" ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] \u003c= self._max_seq_len)\n",
521+
" ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] <= self._max_seq_len)\n",
522522
" ds = ds.shuffle(buffer_size=self.BUFFER_SIZE_SHUFFLE)\n",
523523
" return ds"
524524
]
@@ -656,7 +656,7 @@
656656
" )\n",
657657
" )\n",
658658
" ds = ds.map(lambda x, y, z: self._to_training_input(x, y, z))\n",
659-
" ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] \u003c= self._max_seq_len)\n",
659+
" ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] <= self._max_seq_len)\n",
660660
" ds = ds.shuffle(buffer_size=self.BUFFER_SIZE_SHUFFLE)\n",
661661
" return ds"
662662
]
@@ -802,7 +802,7 @@
802802
" )\n",
803803
" )\n",
804804
" ds = ds.map(lambda x, y, z: self._to_training_input(x, y, z))\n",
805-
" ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] \u003c= self._max_seq_len)\n",
805+
" ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] <= self._max_seq_len)\n",
806806
" ds = ds.shuffle(buffer_size=self.BUFFER_SIZE_SHUFFLE)\n",
807807
"\n",
808808
" return ds"
@@ -949,7 +949,7 @@
949949
" )\n",
950950
" )\n",
951951
" ds = ds.map(lambda x, y, z: self._to_training_input(x, y, z))\n",
952-
" ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] \u003c= self._max_seq_len)\n",
952+
" ds = ds.filter(lambda x: tf.shape(x.input_tokens)[0] <= self._max_seq_len)\n",
953953
" ds = ds.shuffle(buffer_size=self.BUFFER_SIZE_SHUFFLE)\n",
954954
"\n",
955955
" return ds"

0 commit comments

Comments
 (0)