Skip to content

Commit aff1441

Browse files
committed
Fixed minor bug in machine translation tokenization algorithm.
1 parent 4871642 commit aff1441

File tree

5 files changed

+5
-5
lines changed

5 files changed

+5
-5
lines changed

chapter_recurrent-modern/machine-translation-and-dataset.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ and `tgt[i]` is that in the target language (French here).
160160
def _tokenize(self, text, max_examples=None):
161161
src, tgt = [], []
162162
for i, line in enumerate(text.split('\n')):
163-
if max_examples and i > max_examples: break
163+
if max_examples and i >= max_examples: break
164164
parts = line.split('\t')
165165
if len(parts) == 2:
166166
# Skip empty tokens

d2l/jax.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1028,7 +1028,7 @@ def _tokenize(self, text, max_examples=None):
10281028
"""Defined in :numref:`sec_machine_translation`"""
10291029
src, tgt = [], []
10301030
for i, line in enumerate(text.split('\n')):
1031-
if max_examples and i > max_examples: break
1031+
if max_examples and i >= max_examples: break
10321032
parts = line.split('\t')
10331033
if len(parts) == 2:
10341034
# Skip empty tokens

d2l/mxnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,7 @@ def _tokenize(self, text, max_examples=None):
871871
"""Defined in :numref:`sec_machine_translation`"""
872872
src, tgt = [], []
873873
for i, line in enumerate(text.split('\n')):
874-
if max_examples and i > max_examples: break
874+
if max_examples and i >= max_examples: break
875875
parts = line.split('\t')
876876
if len(parts) == 2:
877877
# Skip empty tokens

d2l/tensorflow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,7 @@ def _tokenize(self, text, max_examples=None):
827827
"""Defined in :numref:`sec_machine_translation`"""
828828
src, tgt = [], []
829829
for i, line in enumerate(text.split('\n')):
830-
if max_examples and i > max_examples: break
830+
if max_examples and i >= max_examples: break
831831
parts = line.split('\t')
832832
if len(parts) == 2:
833833
# Skip empty tokens

d2l/torch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ def _tokenize(self, text, max_examples=None):
861861
"""Defined in :numref:`sec_machine_translation`"""
862862
src, tgt = [], []
863863
for i, line in enumerate(text.split('\n')):
864-
if max_examples and i > max_examples: break
864+
if max_examples and i >= max_examples: break
865865
parts = line.split('\t')
866866
if len(parts) == 2:
867867
# Skip empty tokens

0 commit comments

Comments
 (0)