@@ -153,15 +153,15 @@ def __next__(self) -> int:
153153 # then rank 0 will ONLY sample from [0, 2, 4, 6, 8], and rank 1 from [1, 3, 5, 7, 9].
154154 # In each iteration we first produce `in_rank_sample` which is the sample index in the rank,
155155 # based on the size of the subset which that rank can sample from.
156- # Then we computer `sample_idx_in_dataset` for the indx of the sample in the whole dataset.
156+ # Then we compute `sample_idx_in_dataset` for the index of the sample in the whole dataset.
157157 # For the above example if we are sampling for rank 1, we have `self.rng.integers(5)`.
158158 # Let's assume the result is 2, then `in_rank_sample` is 2 (number "5" in the subset),
159159 # so the sample index in the whole dataset is
160160 # `in_rank_sample * self.num_replicas + self.rank`: 2 * 2 + 1 = 5.
161161
162162 selected_dataset_size = self .dataset_sizes [selected_dataset_idx ]
163163 # 1) Getting sample index in the rank.
164- # NOTE: this may effectively drops the last batch,
164+ # NOTE: this may effectively drop the last batch,
165165 # but given the sample sizes that we use this sampler with, it should not be an issue.
166166 num_samples_in_rank = selected_dataset_size // self .num_replicas
167167 in_rank_sample = self .rng .integers (num_samples_in_rank )
@@ -260,7 +260,7 @@ def __init__(
260260 self .individual_dataset_sampler = []
261261 for ids , ds in enumerate (self .dataset_sizes ):
262262
263- # NOTE: this may effectively drops the last batch,
263+ # NOTE: this may effectively drop the last batch,
264264 # but given the sample sizes that we use this sampler with, it should not be an issue.
265265 num_samples_in_rank = ds // self .num_replicas
266266 self .individual_dataset_sampler .append (self ._new_sampler (num_samples_in_rank ))
0 commit comments