Skip to content

Commit 0943ac0

Browse files
committed
feat(async-job): write fatal error message to k8s termination message path
Signed-off-by: Jon Burdo <jon@jonburdo.com>
1 parent bde2a61 commit 0943ac0

1 file changed

Lines changed: 27 additions & 12 deletions

File tree

jobs/async-upload/job/entrypoint.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import asyncio
22
import logging
33
import os
4-
import sys
54

65
from job.upload import perform_upload
76
from .config import get_config
@@ -13,16 +12,32 @@
1312
from .download import perform_download
1413

1514
# Configure logging
16-
log_level = os.getenv('LOGLEVEL', logging.INFO)
15+
log_level = os.getenv("LOGLEVEL", logging.INFO)
1716
logging.basicConfig(
18-
level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
19-
force=True
17+
level=log_level,
18+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
19+
force=True,
2020
)
2121
logger = logging.getLogger(__name__)
2222

2323
# Test logging configuration immediately
2424
logger.info("📝 Logging configuration initialized successfully")
2525

26+
termination_message_path = os.environ.get(
27+
"TERMINATION_MESSAGE_PATH", "/dev/termination-log"
28+
)
29+
30+
31+
def write_to_termination_message_path(message):
32+
with open(termination_message_path, "w") as f:
33+
f.write(message)
34+
35+
36+
def record_error(exc):
37+
message = f"Unexpected error: {exc}"
38+
write_to_termination_message_path(message)
39+
logger.error(message)
40+
2641

2742
async def main() -> None:
2843
"""
@@ -32,7 +47,10 @@ async def main() -> None:
3247
logger.info("🚀 Starting async upload job...")
3348
try:
3449
# Get complete configuration
35-
config = get_config()
50+
try:
51+
config = get_config()
52+
except Exception as e:
53+
raise RuntimeError("Failed to get config") from e
3654

3755
client = validate_and_get_model_registry_client(config)
3856

@@ -42,19 +60,16 @@ async def main() -> None:
4260
# Download the model from the defined source
4361
perform_download(client, config)
4462

45-
4663
# Upload the model to the destination
4764
uri = perform_upload(config)
4865

4966
await update_model_artifact_uri(uri, client, config)
5067

51-
except ValueError as e:
52-
logger.error(f"Configuration error: {str(e)}")
53-
raise
54-
except Exception as e:
55-
logger.error(f"Unexpected error: {str(e)}")
68+
except BaseException as e:
69+
record_error(e)
5670
raise
57-
logger.info("🏁 Job completed successfully")
71+
else:
72+
logger.info("🏁 Job completed successfully")
5873

5974

6075
if __name__ == "__main__": # pragma: no cover

0 commit comments

Comments
 (0)