Merge pull request #98 from NVIDIA/develop

philipp-fischer · web-flow · commit 77b3e7650f6c · 2025-03-17T13:01:57.000+01:00
Fix optional AV imports
diff --git a/src/megatron/energon/av/__init__.py b/src/megatron/energon/av/__init__.py
@@ -5,12 +5,10 @@
     AVData,
     AVDecoder,
 )
-from megatron.energon.av.fastseek import Fastseek
 from megatron.energon.av.video_data import VideoData
 
 __all__ = [
     "AVData",
     "AVDecoder",
-    "Fastseek",
     "VideoData",
 ]
diff --git a/src/megatron/energon/av/decode_av_frames.py b/src/megatron/energon/av/decode_av_frames.py
@@ -6,20 +6,22 @@
 from fractions import Fraction
 from typing import Literal, Optional, Sequence, Union, overload
 
+import numpy as np
+import torch
+
 # Try importing optional dependencies
 try:
     import av
     import filetype
-    import numpy as np
     import soundfile as sf
-    import torch
+
+    from megatron.energon.av.fastseek import Fastseek
 
     AV_DECODE_AVAILABLE = True
 except ImportError as e:
     AV_DECODE_AVAILABLE = False
     MISSING_DEPENDENCY = str(e)
 
-from megatron.energon.av.fastseek import Fastseek
 from megatron.energon.av.video_data import VideoData
 
 
@@ -100,7 +102,7 @@ def get_frame_batch(
         self,
         frame_indices: Sequence[int],
         out_frame_size: Optional[tuple[int, int]] = None,
-        seeker: Optional[Fastseek] = None,
+        seeker: Optional["Fastseek"] = None,
     ) -> tuple[torch.Tensor, dict]:
         """Gets a batch of frames at the given indices from a video file.
 
@@ -252,7 +254,7 @@ def decode_video_frames(
             The method uses the Fastseek class to optimize frame seeking, which determines
             whether to use frame numbers or timestamps based on the container format.
         """
-        seeker: Fastseek = Fastseek(self.stream)
+        seeker: "Fastseek" = Fastseek(self.stream)
         self.stream.seek(0)
 
         # --- First, decode video frames ---
@@ -512,6 +514,13 @@ def __init__(
         video_out_frame_size: tuple[int, int],
         video_decode: Literal["torch", "AVData"] = "torch",
     ) -> None:
+        if not AV_DECODE_AVAILABLE:
+            raise ImportError(
+                f"AV decoding is not available. Please install the required dependencies with:\n"
+                f"pip install megatron-energon[av_decode]\n"
+                f"Missing dependency: {MISSING_DEPENDENCY}"
+            )
+
         self.audio_clip_duration = audio_clip_duration
         self.audio_num_clips = audio_num_clips
         self.video_decode_audio = video_decode_audio

Original file line number	Diff line number	Diff line change
`@@ -5,12 +5,10 @@`
`5`	`5`	`AVData,`
`6`	`6`	`AVDecoder,`
`7`	`7`	`)`
`8`		`-from megatron.energon.av.fastseek import Fastseek`
`9`	`8`	`from megatron.energon.av.video_data import VideoData`
`10`	`9`
`11`	`10`	`__all__ = [`
`12`	`11`	`"AVData",`
`13`	`12`	`"AVDecoder",`
`14`		`- "Fastseek",`
`15`	`13`	`"VideoData",`
`16`	`14`	`]`