-
Notifications
You must be signed in to change notification settings - Fork 25
feat: add nim image retrieval endpoint support #394
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,9 +2,13 @@ | |
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| from collections.abc import Iterable | ||
| from urllib.parse import urlparse | ||
|
|
||
| from aiperf.common.enums.dataset_enums import AudioFormat | ||
| from aiperf.common.enums.media_enums import MediaType | ||
| from aiperf.common.models import Media | ||
| from aiperf.common.types import MediaT | ||
| from aiperf.common.types import MediaT, MediaTypeT | ||
| from aiperf.dataset import utils | ||
| from aiperf.dataset.loader.models import CustomDatasetT | ||
|
|
||
|
|
||
|
|
@@ -51,8 +55,8 @@ def _convert_to_media_objects( | |
|
|
||
| Args: | ||
| data: The custom dataset to construct media objects from. | ||
| media_class: The target media class (Text, Image, or Audio). | ||
| field: The name of the field (e.g., 'text', 'image', 'audio'). | ||
| media_class: The target media class (Text, Image, Audio, or Video). | ||
| field: The name of the field (e.g., 'text', 'image', 'audio', 'video'). | ||
| name: The name of the media field. | ||
|
|
||
| Returns: | ||
|
|
@@ -61,6 +65,9 @@ def _convert_to_media_objects( | |
| # Check singular field first | ||
| value = getattr(data, field, None) | ||
| if value is not None: | ||
| # Handle media content (encode local files to base64) | ||
| if field in [MediaType.IMAGE, MediaType.VIDEO, MediaType.AUDIO]: | ||
| value = self._handle_media_content(value, media_type=MediaType(field)) | ||
| return [media_class(name=name, contents=[value])] | ||
|
|
||
| # Check plural field | ||
|
|
@@ -72,4 +79,124 @@ def _convert_to_media_objects( | |
| if all(isinstance(v, media_class) for v in values): | ||
| return values | ||
|
|
||
| # Handle media content (encode local files to base64) | ||
| if field in [MediaType.IMAGE, MediaType.VIDEO, MediaType.AUDIO]: | ||
| values = [ | ||
| self._handle_media_content(v, media_type=MediaType(field)) | ||
| for v in values | ||
| ] | ||
|
|
||
| return [media_class(name=name, contents=values)] | ||
|
|
||
| def _is_url(self, content: str) -> bool: | ||
| """Check if content is a valid URL with scheme and netloc. | ||
|
|
||
| Args: | ||
| content: The content to check. | ||
|
|
||
| Returns: | ||
| True if content is a URL, False otherwise. | ||
|
|
||
| Raises: | ||
| ValueError: If URL has only scheme or only netloc (invalid). | ||
| """ | ||
| url = urlparse(content) | ||
|
|
||
| # Valid URL with both scheme and netloc | ||
| if url.scheme and url.netloc: | ||
| return True | ||
|
|
||
| # Invalid URL - has one but not both | ||
| if url.scheme or url.netloc: | ||
| raise ValueError(f"Valid URL must have both a scheme and netloc: {content}") | ||
|
|
||
| # Not a URL | ||
| return False | ||
|
|
||
| def _is_already_encoded(self, content: str, media_type: MediaTypeT) -> bool: | ||
| """Check if content is already encoded in the expected format. | ||
|
|
||
| Args: | ||
| content: The content to check. | ||
| media_type: The media type (MediaType.IMAGE, MediaType.AUDIO, MediaType.VIDEO). | ||
|
|
||
| Returns: | ||
| True if content is already encoded, False otherwise. | ||
| """ | ||
| url = urlparse(content) | ||
|
|
||
| if media_type in [MediaType.IMAGE, MediaType.VIDEO]: | ||
| # Check for data URL format | ||
| return url.scheme == "data" | ||
|
|
||
| elif media_type == MediaType.AUDIO: | ||
| # Check for "format,base64" format | ||
| if "," in content and not url.scheme: | ||
| parts = content.split(",", 1) | ||
| return len(parts) == 2 and parts[0].lower() in [ | ||
| AudioFormat.WAV, | ||
| AudioFormat.MP3, | ||
| ] | ||
| return False | ||
|
|
||
| return False | ||
|
|
||
| def _encode_media_file(self, content: str, media_type: MediaTypeT) -> str: | ||
| """Encode a local media file to base64. | ||
|
|
||
| Args: | ||
| content: The file path to encode. | ||
| media_type: The media type (MediaType.IMAGE, MediaType.AUDIO, MediaType.VIDEO). | ||
|
|
||
| Returns: | ||
| The base64-encoded content in the appropriate format. | ||
|
|
||
| Raises: | ||
| FileNotFoundError: If the file doesn't exist. | ||
| RuntimeError: If the format is unsupported. | ||
| """ | ||
| if media_type == MediaType.IMAGE: | ||
| img = utils.open_image(content) | ||
| img_base64 = utils.encode_image(img, img.format) | ||
| return f"data:image/{img.format.lower()};base64,{img_base64}" | ||
|
|
||
| elif media_type == MediaType.AUDIO: | ||
| audio_bytes, audio_format = utils.open_audio(content) | ||
| return utils.encode_audio(audio_bytes, audio_format) | ||
|
|
||
| elif media_type == MediaType.VIDEO: | ||
| video_bytes, video_format = utils.open_video(content) | ||
| return utils.encode_video(video_bytes, video_format) | ||
|
Comment on lines
+163
to
+169
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chainVerify type compatibility between open and encode functions. Lines 164-165 and 168-169 appear to have type mismatches:
This could cause type checking errors or runtime issues if the encode functions don't handle string inputs gracefully. Run the following script to check the actual return types and usage: 🏁 Script executed: #!/bin/bash
# Check the return statements and type annotations for open_* functions
echo "=== open_audio return type ==="
rg -n "def open_audio" --type=py -A 35 | grep -E "(def open_audio|return |-> )"
echo -e "\n=== open_video return type ==="
rg -n "def open_video" --type=py -A 35 | grep -E "(def open_video|return |-> )"
echo -e "\n=== encode_audio signature ==="
rg -n "def encode_audio" --type=py -A 5
echo -e "\n=== encode_video signature ==="
rg -n "def encode_video" --type=py -A 5Length of output: 1526 Length of output: 2232 Fix type annotation mismatches in utils.py. The functions are annotated inconsistently with their actual behavior:
While the code works at runtime (strings have 🤖 Prompt for AI Agents |
||
|
|
||
| raise ValueError(f"Unsupported media type: {media_type}") | ||
|
|
||
| def _handle_media_content(self, content: str, media_type: MediaTypeT) -> str: | ||
| """Generic handler for media content encoding. | ||
|
|
||
| If the content is a URL, it's returned as-is. | ||
| If it's already encoded, it's returned as-is. | ||
| If it's a local file path, it's loaded and encoded to base64. | ||
|
|
||
| Args: | ||
| content: The media content - URL, encoded string, or local file path. | ||
| media_type: The media type (MediaType.IMAGE, MediaType.AUDIO, MediaType.VIDEO). | ||
|
|
||
| Returns: | ||
| The processed media content. | ||
|
|
||
| Raises: | ||
| FileNotFoundError: If the local file doesn't exist. | ||
| RuntimeError: If the media format is unsupported. | ||
| ValueError: If URL format is invalid. | ||
| """ | ||
| # Check if it's already encoded first (before URL check) | ||
| # This handles data URLs which have a scheme but no netloc | ||
| if self._is_already_encoded(content, media_type): | ||
| return content | ||
|
|
||
| # Check if it's a URL | ||
| if self._is_url(content): | ||
| return content | ||
|
|
||
| # Otherwise, it's a local file path - encode it | ||
| return self._encode_media_file(content, media_type) | ||
Uh oh!
There was an error while loading. Please reload this page.