22# SPDX-License-Identifier: Apache-2.0
33
44from collections .abc import Iterable
5+ from urllib .parse import urlparse
56
7+ from aiperf .common .enums .dataset_enums import AudioFormat
8+ from aiperf .common .enums .media_enums import MediaType
69from aiperf .common .models import Media
7- from aiperf .common .types import MediaT
10+ from aiperf .common .types import MediaT , MediaTypeT
11+ from aiperf .dataset import utils
812from aiperf .dataset .loader .models import CustomDatasetT
913
1014
@@ -51,8 +55,8 @@ def _convert_to_media_objects(
5155
5256 Args:
5357 data: The custom dataset to construct media objects from.
54- media_class: The target media class (Text, Image, or Audio ).
55- field: The name of the field (e.g., 'text', 'image', 'audio').
58+ media_class: The target media class (Text, Image, Audio, or Video ).
59+ field: The name of the field (e.g., 'text', 'image', 'audio', 'video' ).
5660 name: The name of the media field.
5761
5862 Returns:
@@ -61,6 +65,9 @@ def _convert_to_media_objects(
6165 # Check singular field first
6266 value = getattr (data , field , None )
6367 if value is not None :
68+ # Handle media content (encode local files to base64)
69+ if field in [MediaType .IMAGE , MediaType .VIDEO , MediaType .AUDIO ]:
70+ value = self ._handle_media_content (value , media_type = MediaType (field ))
6471 return [media_class (name = name , contents = [value ])]
6572
6673 # Check plural field
@@ -72,4 +79,124 @@ def _convert_to_media_objects(
7279 if all (isinstance (v , media_class ) for v in values ):
7380 return values
7481
82+ # Handle media content (encode local files to base64)
83+ if field in [MediaType .IMAGE , MediaType .VIDEO , MediaType .AUDIO ]:
84+ values = [
85+ self ._handle_media_content (v , media_type = MediaType (field ))
86+ for v in values
87+ ]
88+
7589 return [media_class (name = name , contents = values )]
90+
91+ def _is_url (self , content : str ) -> bool :
92+ """Check if content is a valid URL with scheme and netloc.
93+
94+ Args:
95+ content: The content to check.
96+
97+ Returns:
98+ True if content is a URL, False otherwise.
99+
100+ Raises:
101+ ValueError: If URL has only scheme or only netloc (invalid).
102+ """
103+ url = urlparse (content )
104+
105+ # Valid URL with both scheme and netloc
106+ if url .scheme and url .netloc :
107+ return True
108+
109+ # Invalid URL - has one but not both
110+ if url .scheme or url .netloc :
111+ raise ValueError (f"Valid URL must have both a scheme and netloc: { content } " )
112+
113+ # Not a URL
114+ return False
115+
116+ def _is_already_encoded (self , content : str , media_type : MediaTypeT ) -> bool :
117+ """Check if content is already encoded in the expected format.
118+
119+ Args:
120+ content: The content to check.
121+ media_type: The media type (MediaType.IMAGE, MediaType.AUDIO, MediaType.VIDEO).
122+
123+ Returns:
124+ True if content is already encoded, False otherwise.
125+ """
126+ url = urlparse (content )
127+
128+ if media_type in [MediaType .IMAGE , MediaType .VIDEO ]:
129+ # Check for data URL format
130+ return url .scheme == "data"
131+
132+ elif media_type == MediaType .AUDIO :
133+ # Check for "format,base64" format
134+ if "," in content and not url .scheme :
135+ parts = content .split ("," , 1 )
136+ return len (parts ) == 2 and parts [0 ].lower () in [
137+ AudioFormat .WAV ,
138+ AudioFormat .MP3 ,
139+ ]
140+ return False
141+
142+ return False
143+
144+ def _encode_media_file (self , content : str , media_type : MediaTypeT ) -> str :
145+ """Encode a local media file to base64.
146+
147+ Args:
148+ content: The file path to encode.
149+ media_type: The media type (MediaType.IMAGE, MediaType.AUDIO, MediaType.VIDEO).
150+
151+ Returns:
152+ The base64-encoded content in the appropriate format.
153+
154+ Raises:
155+ FileNotFoundError: If the file doesn't exist.
156+ RuntimeError: If the format is unsupported.
157+ """
158+ if media_type == MediaType .IMAGE :
159+ img = utils .open_image (content )
160+ img_base64 = utils .encode_image (img , img .format )
161+ return f"data:image/{ img .format .lower ()} ;base64,{ img_base64 } "
162+
163+ elif media_type == MediaType .AUDIO :
164+ audio_bytes , audio_format = utils .open_audio (content )
165+ return utils .encode_audio (audio_bytes , audio_format )
166+
167+ elif media_type == MediaType .VIDEO :
168+ video_bytes , video_format = utils .open_video (content )
169+ return utils .encode_video (video_bytes , video_format )
170+
171+ raise ValueError (f"Unsupported media type: { media_type } " )
172+
173+ def _handle_media_content (self , content : str , media_type : MediaTypeT ) -> str :
174+ """Generic handler for media content encoding.
175+
176+ If the content is a URL, it's returned as-is.
177+ If it's already encoded, it's returned as-is.
178+ If it's a local file path, it's loaded and encoded to base64.
179+
180+ Args:
181+ content: The media content - URL, encoded string, or local file path.
182+ media_type: The media type (MediaType.IMAGE, MediaType.AUDIO, MediaType.VIDEO).
183+
184+ Returns:
185+ The processed media content.
186+
187+ Raises:
188+ FileNotFoundError: If the local file doesn't exist.
189+ RuntimeError: If the media format is unsupported.
190+ ValueError: If URL format is invalid.
191+ """
192+ # Check if it's already encoded first (before URL check)
193+ # This handles data URLs which have a scheme but no netloc
194+ if self ._is_already_encoded (content , media_type ):
195+ return content
196+
197+ # Check if it's a URL
198+ if self ._is_url (content ):
199+ return content
200+
201+ # Otherwise, it's a local file path - encode it
202+ return self ._encode_media_file (content , media_type )
0 commit comments