|
6 | 6 | from fractions import Fraction
|
7 | 7 | from typing import Literal, Optional, Sequence, Union, overload
|
8 | 8 |
|
| 9 | +import numpy as np |
| 10 | +import torch |
| 11 | + |
9 | 12 | # Try importing optional dependencies
|
10 | 13 | try:
|
11 | 14 | import av
|
12 | 15 | import filetype
|
13 |
| - import numpy as np |
14 | 16 | import soundfile as sf
|
15 |
| - import torch |
| 17 | + |
| 18 | + from megatron.energon.av.fastseek import Fastseek |
16 | 19 |
|
17 | 20 | AV_DECODE_AVAILABLE = True
|
18 | 21 | except ImportError as e:
|
19 | 22 | AV_DECODE_AVAILABLE = False
|
20 | 23 | MISSING_DEPENDENCY = str(e)
|
21 | 24 |
|
22 |
| -from megatron.energon.av.fastseek import Fastseek |
23 | 25 | from megatron.energon.av.video_data import VideoData
|
24 | 26 |
|
25 | 27 |
|
@@ -100,7 +102,7 @@ def get_frame_batch(
|
100 | 102 | self,
|
101 | 103 | frame_indices: Sequence[int],
|
102 | 104 | out_frame_size: Optional[tuple[int, int]] = None,
|
103 |
| - seeker: Optional[Fastseek] = None, |
| 105 | + seeker: Optional["Fastseek"] = None, |
104 | 106 | ) -> tuple[torch.Tensor, dict]:
|
105 | 107 | """Gets a batch of frames at the given indices from a video file.
|
106 | 108 |
|
@@ -252,7 +254,7 @@ def decode_video_frames(
|
252 | 254 | The method uses the Fastseek class to optimize frame seeking, which determines
|
253 | 255 | whether to use frame numbers or timestamps based on the container format.
|
254 | 256 | """
|
255 |
| - seeker: Fastseek = Fastseek(self.stream) |
| 257 | + seeker: "Fastseek" = Fastseek(self.stream) |
256 | 258 | self.stream.seek(0)
|
257 | 259 |
|
258 | 260 | # --- First, decode video frames ---
|
@@ -512,6 +514,13 @@ def __init__(
|
512 | 514 | video_out_frame_size: tuple[int, int],
|
513 | 515 | video_decode: Literal["torch", "AVData"] = "torch",
|
514 | 516 | ) -> None:
|
| 517 | + if not AV_DECODE_AVAILABLE: |
| 518 | + raise ImportError( |
| 519 | + f"AV decoding is not available. Please install the required dependencies with:\n" |
| 520 | + f"pip install megatron-energon[av_decode]\n" |
| 521 | + f"Missing dependency: {MISSING_DEPENDENCY}" |
| 522 | + ) |
| 523 | + |
515 | 524 | self.audio_clip_duration = audio_clip_duration
|
516 | 525 | self.audio_num_clips = audio_num_clips
|
517 | 526 | self.video_decode_audio = video_decode_audio
|
|
0 commit comments