Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 4.2.25 on 2025-10-22 12:51

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("algorithms", "0083_alter_job_signing_key"),
]

operations = [
migrations.AddField(
model_name="job",
name="exec_duration",
field=models.DurationField(
default=None,
editable=False,
help_text="The duration of the execution, if measured. Excludes data validation, container pulling, model downloading, data downloading and data uploading times. Includes model loading time, input data loading time, processing time, output data writing time and any delays from shared hardware issues.",
null=True,
),
),
migrations.AddField(
model_name="job",
name="invoke_duration",
field=models.DurationField(
default=None,
editable=False,
help_text="The duration of the invocation, if measured. Excludes data validation, container pulling, model downloading, data downloading and data uploading times. Potentially excludes model loading time, depending on the users implementation. Includes input data loading time, processing time, output data writing time and any delays from shared hardware issues.",
null=True,
),
),
]
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _create_job_boto(self):
},
StoppingCondition={
# https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_StoppingCondition.html
"MaxRuntimeInSeconds": self._time_limit,
"MaxRuntimeInSeconds": int(self._time_limit.total_seconds()),
},
Environment={
**self.invocation_environment,
Expand Down
53 changes: 38 additions & 15 deletions app/grandchallenge/components/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import secrets
from abc import ABC, abstractmethod
from datetime import timedelta
from json import JSONDecodeError
from math import ceil
from pathlib import Path
Expand All @@ -30,6 +31,8 @@
from django.utils._os import safe_join
from django.utils.functional import cached_property
from panimg.image_builders import image_builder_mhd, image_builder_tiff
from pydantic import BaseModel, ConfigDict
from pydantic_core import to_json

from grandchallenge.cases.tasks import import_images
from grandchallenge.components.backends.exceptions import (
Expand Down Expand Up @@ -266,6 +269,25 @@ async def s3_stream_response(
raise


class InferenceIO(BaseModel):
model_config = ConfigDict(frozen=True)

relative_path: str
bucket_name: str
bucket_key: str
decompress: bool


class InferenceTask(BaseModel):
model_config = ConfigDict(frozen=True)

pk: str
inputs: list[InferenceIO]
output_bucket_name: str
output_prefix: str
timeout: timedelta


class Executor(ABC):
def __init__(
self,
Expand All @@ -285,7 +307,7 @@ def __init__(
self._job_id = job_id
self._exec_image_repo_tag = exec_image_repo_tag
self._memory_limit = memory_limit
self._time_limit = time_limit
self._time_limit = timedelta(seconds=time_limit)
Comment thread
koopmant marked this conversation as resolved.
self._requires_gpu_type = requires_gpu_type
self._use_warm_pool = (
use_warm_pool and settings.COMPONENTS_USE_WARM_POOL
Expand Down Expand Up @@ -540,16 +562,16 @@ def _get_provisioning_tasks(self, *, input_civs, input_prefixes):
):
provisioning_tasks.append(civ_provisioning_task.task)
invocation_inputs.append(
{
"relative_path": str(
InferenceIO(
relative_path=str(
os.path.relpath(
civ_provisioning_task.key, self._io_prefix
)
),
"bucket_name": settings.COMPONENTS_INPUT_BUCKET_NAME,
"bucket_key": civ_provisioning_task.key,
"decompress": civ.decompress,
}
bucket_name=settings.COMPONENTS_INPUT_BUCKET_NAME,
bucket_key=civ_provisioning_task.key,
decompress=civ.decompress,
)
)

provisioning_tasks.append(
Expand Down Expand Up @@ -666,16 +688,17 @@ def _get_civ_provisioning_tasks(self, *, civ, input_prefixes):

def _get_create_invocation_json_task(self, *, invocation_inputs):
return self._get_upload_input_content_task(
content=json.dumps(
content=to_json(
[
{
"pk": self._job_id,
"inputs": invocation_inputs,
"output_bucket_name": settings.COMPONENTS_OUTPUT_BUCKET_NAME,
"output_prefix": self._io_prefix,
}
InferenceTask(
pk=self._job_id,
inputs=invocation_inputs,
output_bucket_name=settings.COMPONENTS_OUTPUT_BUCKET_NAME,
output_prefix=self._io_prefix,
timeout=self._time_limit,
)
]
).encode("utf-8"),
),
key=self._invocation_key,
)

Expand Down
28 changes: 28 additions & 0 deletions app/grandchallenge/components/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1619,6 +1619,34 @@ class ComponentJob(FieldChangeMixin, UUIDModel):
attempt = models.PositiveSmallIntegerField(editable=False, default=0)
stdout = models.TextField()
stderr = models.TextField(default="")
exec_duration = models.DurationField(
null=True,
default=None,
editable=False,
help_text=(
"The duration of the execution, if measured. "
"Excludes data validation, container pulling, model downloading, "
"data downloading and data uploading times. "
"Includes model loading time, input data loading time, "
"processing time, output data writing time and "
"any delays from shared hardware issues."
),
)
invoke_duration = models.DurationField(
null=True,
default=None,
editable=False,
help_text=(
"The duration of the invocation, if measured. "
"Excludes data validation, container pulling, model downloading, "
"data downloading and data uploading times. "
"Potentially excludes model loading time, depending on the "
"users implementation. "
"Includes input data loading time, "
"processing time, output data writing time and "
"any delays from shared hardware issues."
),
)
runtime_metrics = models.JSONField(default=dict, editable=False)
error_message = models.CharField(max_length=1024, default="")
detailed_error_message = models.JSONField(blank=True, default=dict)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 4.2.25 on 2025-10-22 12:51

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("evaluation", "0100_alter_evaluation_signing_key"),
]

operations = [
migrations.AddField(
model_name="evaluation",
name="exec_duration",
field=models.DurationField(
default=None,
editable=False,
help_text="The duration of the execution, if measured. Excludes data validation, container pulling, model downloading, data downloading and data uploading times. Includes model loading time, input data loading time, processing time, output data writing time and any delays from shared hardware issues.",
null=True,
),
),
migrations.AddField(
model_name="evaluation",
name="invoke_duration",
field=models.DurationField(
default=None,
editable=False,
help_text="The duration of the invocation, if measured. Excludes data validation, container pulling, model downloading, data downloading and data uploading times. Potentially excludes model loading time, depending on the users implementation. Includes input data loading time, processing time, output data writing time and any delays from shared hardware issues.",
null=True,
),
),
]
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def test_invocation_prefix():
),
)
def test_transform_job_name(model, container, container_model, key):
j = model(pk=uuid4())
j = model(pk=uuid4(), time_limit=60)
setattr(j, container, container_model(pk=uuid4()))
executor = AmazonSageMakerTrainingExecutor(**j.executor_kwargs)

Expand Down Expand Up @@ -169,7 +169,7 @@ def test_invocation_json(settings):
time_limit=60,
requires_gpu_type=GPUTypeChoices.NO_GPU,
use_warm_pool=False,
signing_key=b"",
signing_key=b"totallysecret",
)

with Stubber(executor._sagemaker_client) as s:
Expand Down Expand Up @@ -205,6 +205,7 @@ def test_invocation_json(settings):
"GRAND_CHALLENGE_COMPONENT_WRITABLE_DIRECTORIES": "/opt/ml/output/data:/opt/ml/model:/opt/ml/input/data/ground_truth:/opt/ml/checkpoints:/tmp",
"GRAND_CHALLENGE_COMPONENT_POST_CLEAN_DIRECTORIES": "/opt/ml/output/data:/opt/ml/model:/opt/ml/input/data/ground_truth",
"GRAND_CHALLENGE_COMPONENT_MAX_MEMORY_MB": "7168",
"GRAND_CHALLENGE_COMPONENT_SIGNING_KEY_HEX": "746f74616c6c79736563726574",
},
"VpcConfig": {
"SecurityGroupIds": [
Expand All @@ -216,6 +217,7 @@ def test_invocation_json(settings):
},
)
executor.provision(input_civs=[], input_prefixes={})
executor.execute() # Required to validate expected_params in the stubber

with io.BytesIO() as fileobj:
executor._s3_client.download_fileobj(
Expand All @@ -239,6 +241,7 @@ def test_invocation_json(settings):
"output_bucket_name": "grand-challenge-components-outputs",
"output_prefix": f"/io/algorithms/job/{pk}",
"pk": f"algorithms-job-{pk}",
"timeout": "PT1M",
}
]

Expand Down
2 changes: 2 additions & 0 deletions app/tests/components_tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ def test_invocation_json(settings):
"output_bucket_name": "grand-challenge-components-outputs",
"output_prefix": f"/io/test/test/{job_pk}",
"pk": f"test-test-{job_pk}",
"timeout": "PT1M40S",
},
]

Expand Down Expand Up @@ -667,6 +668,7 @@ def test_dicom_get_provisioning_tasks():
"output_bucket_name": "grand-challenge-components-outputs",
"output_prefix": f"/io/test/test/{job_pk}",
"pk": f"test-test-{job_pk}",
"timeout": "PT1M40S",
},
]

Expand Down
4 changes: 2 additions & 2 deletions dockerfiles/web-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ RUN mkdir -p /opt/docker \

ENV PYTHONUNBUFFERED=1\
AWS_XRAY_SDK_ENABLED=false\
COMPONENTS_SAGEMAKER_SHIM_VERSION=0.5.0\
COMPONENTS_SAGEMAKER_SHIM_VERSION=0.6.0\
PATH="/opt/grand-challenge/.venv/bin:/home/django/.local/bin:${PATH}"

RUN mkdir -p /opt/grand-challenge /app /static /opt/sagemaker-shim /opt/virtualenvs \
Expand All @@ -90,7 +90,7 @@ USER django:django
# Fetch and install sagemaker shim for shimming containers
RUN mkdir -p /opt/sagemaker-shim \
&& wget "https://github.com/DIAGNijmegen/rse-sagemaker-shim/releases/download/v${COMPONENTS_SAGEMAKER_SHIM_VERSION}/sagemaker-shim-${COMPONENTS_SAGEMAKER_SHIM_VERSION}-Linux-x86_64.tar.gz" -P /opt/sagemaker-shim/ \
&& echo "e0fe6715342c706a14feb0be9a4343c9cf66efa83a14131b9bf25a8b189d50c6 /opt/sagemaker-shim/sagemaker-shim-${COMPONENTS_SAGEMAKER_SHIM_VERSION}-Linux-x86_64.tar.gz" | shasum -c - || exit 1 \
&& echo "a3c33f65ee72e039dd90c9d7a3460de33abdd020f530164b8666975a1ae5e192 /opt/sagemaker-shim/sagemaker-shim-${COMPONENTS_SAGEMAKER_SHIM_VERSION}-Linux-x86_64.tar.gz" | shasum -c - || exit 1 \
&& tar -C /opt/sagemaker-shim/ -xzvf "/opt/sagemaker-shim/sagemaker-shim-${COMPONENTS_SAGEMAKER_SHIM_VERSION}-Linux-x86_64.tar.gz" \
&& rm "/opt/sagemaker-shim/sagemaker-shim-${COMPONENTS_SAGEMAKER_SHIM_VERSION}-Linux-x86_64.tar.gz"

Expand Down
1 change: 1 addition & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading