fixing miscellaneous typos in different files (#77)

didier-durand · web-flow · commit 15c7c00facd3 · 2025-08-19T12:47:04.000-07:00
diff --git a/src/datasets/utils/video/transforms.py b/src/datasets/utils/video/transforms.py
@@ -831,7 +831,7 @@ class RandomResize(object):
     Args:
     interpolation (str): Can be one of 'nearest', 'bilinear'
     defaults to nearest
-    size (tuple): (widht, height)
+    size (tuple): (width, height)
     """
 
     def __init__(self, ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation="nearest"):
@@ -860,7 +860,7 @@ class Resize(object):
     Args:
     interpolation (str): Can be one of 'nearest', 'bilinear'
     defaults to nearest
-    size (tuple): (widht, height)
+    size (tuple): (width, height)
     """
 
     def __init__(self, size, interpolation="nearest"):
@@ -1138,7 +1138,7 @@ class Normalize(object):
     will normalize each channel of the input ``torch.*Tensor`` i.e.
     ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
     .. note::
-        This transform acts out of place, i.e., it does not mutates the input tensor.
+        This transform acts out of place, i.e., it does not mutate the input tensor.
     Args:
         mean (sequence): Sequence of means for each channel.
         std (sequence): Sequence of standard deviations for each channel.
diff --git a/src/datasets/utils/weighted_sampler.py b/src/datasets/utils/weighted_sampler.py
@@ -153,15 +153,15 @@ def __next__(self) -> int:
             # then rank 0 will ONLY sample from [0, 2, 4, 6, 8], and rank 1 from [1, 3, 5, 7, 9].
             # In each iteration we first produce `in_rank_sample` which is the sample index in the rank,
             # based on the size of the subset which that rank can sample from.
-            # Then we computer `sample_idx_in_dataset` for the indx of the sample in the whole dataset.
+            # Then we compute `sample_idx_in_dataset` for the index of the sample in the whole dataset.
             # For the above example if we are sampling for rank 1, we have `self.rng.integers(5)`.
             # Let's assume the result is 2, then `in_rank_sample` is 2 (number "5" in the subset),
             # so the sample index in the whole dataset is
             # `in_rank_sample * self.num_replicas + self.rank`: 2 * 2 + 1 = 5.
 
             selected_dataset_size = self.dataset_sizes[selected_dataset_idx]
             # 1) Getting sample index in the rank.
-            # NOTE: this may effectively drops the last batch,
+            # NOTE: this may effectively drop the last batch,
             # but given the sample sizes that we use this sampler with, it should not be an issue.
             num_samples_in_rank = selected_dataset_size // self.num_replicas
             in_rank_sample = self.rng.integers(num_samples_in_rank)
@@ -260,7 +260,7 @@ def __init__(
             self.individual_dataset_sampler = []
             for ids, ds in enumerate(self.dataset_sizes):
 
-                # NOTE: this may effectively drops the last batch,
+                # NOTE: this may effectively drop the last batch,
                 # but given the sample sizes that we use this sampler with, it should not be an issue.
                 num_samples_in_rank = ds // self.num_replicas
                 self.individual_dataset_sampler.append(self._new_sampler(num_samples_in_rank))
diff --git a/tests/datasets/test_vjepa_transforms.py b/tests/datasets/test_vjepa_transforms.py
@@ -19,7 +19,7 @@ def setUp(self):
         self.g = torch.Generator()
         self.g.manual_seed(42)
 
-    def test_approximation_equivalance(self):
+    def test_approximation_equivalence(self):
         T, H, W, C = 16, 224, 224, 3
         shape = (T, H, W, C)
         mean = torch.tensor([0.485, 0.456, 0.406])