Filter structured logs on task (#4726)

koopmant · web-flow · commit 9836085aa0ab · 2026-05-28T08:46:38.000+02:00
Structured logs from algorithm endpoints can contain messages from multiple invocations. By filtering on `task` these can be separated. If no task is provided, all logs will be included, i.e. also the logs from a specific task. Related to DIAGNijmegen/rse-roadmap#481
diff --git a/app/grandchallenge/components/backends/utils.py b/app/grandchallenge/components/backends/utils.py
@@ -55,21 +55,25 @@ class ParsedLog(NamedTuple):
     inference_result_skipped: bool | None
 
 
-def parse_structured_log(*, log: str) -> ParsedLog | None:
+def parse_structured_log(*, log: str, task=None) -> ParsedLog | None:
     """Parse the structured logs from SageMaker Shim"""
     structured_log = json.loads(log.strip())
 
     message = structured_log["log"]
     source = SourceChoices(structured_log["source"])
     inference_result_skipped = structured_log.get("inference_result_skipped")
 
-    if structured_log["internal"] is False:
-        # Defensive, in case the type of structured_log["internal"] is str
+    # Defensive, in case the type of structured_log["internal"] is str
+    if structured_log["internal"] is False and (
+        task is None or structured_log["task"] == task
+    ):
         return ParsedLog(
             message=message,
             source=source,
             inference_result_skipped=inference_result_skipped,
         )
+    else:
+        return None
 
 
 def safe_extract(*, src: File, dest: Path):
diff --git a/app/tests/components_tests/test_backends.py b/app/tests/components_tests/test_backends.py
@@ -24,6 +24,7 @@
 from grandchallenge.components.backends.exceptions import ComponentException
 from grandchallenge.components.backends.utils import (
     _filter_members,
+    parse_structured_log,
     user_error,
 )
 from grandchallenge.components.models import (
@@ -867,3 +868,39 @@ def test_invocation_results_signature_verified(settings):
     )
 
     assert executor._get_inference_result() == inference_result
+
+
+def test_parse_structured_logs_filters_task():
+    log_with_task = json.dumps(
+        {
+            "log": "message with task",
+            "source": "stdout",
+            "internal": False,
+            "task": "1",
+        }
+    )
+    log_without_task = json.dumps(
+        {
+            "log": "message without task",
+            "source": "stdout",
+            "internal": False,
+            "task": None,
+        }
+    )
+
+    parsed_log = parse_structured_log(log=log_with_task, task="1")
+
+    assert parsed_log.message == "message with task"
+
+    parsed_log = parse_structured_log(log=log_without_task, task="1")
+
+    assert parsed_log is None
+
+    parsed_log = parse_structured_log(log=log_with_task, task=None)
+
+    # when `task` is None, messages with a task are __included__
+    assert parsed_log.message == "message with task"
+
+    parsed_log = parse_structured_log(log=log_without_task, task=None)
+
+    assert parsed_log.message == "message without task"