3838
3939logger = logging .getLogger (__name__ )
4040
41+ POSTPROCESS_PARSE_FAILED_EXIT = 20
42+ POSTPROCESS_UPLOAD_FAILED_EXIT = 11
43+
4144
4245class PostProcessStageMixin :
4346 """Mixin for post-process stage after benchmark completion.
@@ -254,34 +257,7 @@ def _run_postprocess_container(self) -> tuple[Path | None, str | None]:
254257 endpoint_flag = f"--endpoint-url { s3_config .endpoint_url } " if s3_config .endpoint_url else ""
255258
256259 # Build the post-processing script
257- script = f"""
258- set -e
259-
260- # Install uv, srtlog, and awscli
261- echo "Installing uv..."
262- pip install uv
263-
264- echo "Installing srtlog and awscli..."
265- cd /tmp
266- git clone --depth 1 https://github.com/ishandhanani/srtlog.git
267- uv pip install --system ./srtlog awscli
268-
269- # Run srtlog to generate parquet
270- echo "Running srtlog parse..."
271- cd /logs
272- srtlog parse .
273-
274- # Upload entire log directory to S3
275- echo "Uploading entire log directory to S3..."
276- aws s3 sync /logs { s3_url } { endpoint_flag }
277- echo "Upload complete: { s3_url } "
278-
279- # Report what was uploaded
280- echo ""
281- echo "Uploaded files:"
282- find /logs -type f | wc -l
283- echo "files total"
284- """
260+ script = self ._build_postprocess_script (s3_url , endpoint_flag )
285261
286262 # Build env for AWS credentials
287263 env : dict [str , str ] = {}
@@ -301,7 +277,7 @@ def _run_postprocess_container(self) -> tuple[Path | None, str | None]:
301277 nodelist = [self .runtime .nodes .head ],
302278 output = str (self .runtime .log_dir / "postprocess.log" ),
303279 container_image = "python:3.11" ,
304- container_mounts = {str ( self .runtime .log_dir ): "/logs" },
280+ container_mounts = {self .runtime .log_dir : Path ( "/logs" ) },
305281 env_to_set = env ,
306282 )
307283 proc .wait (timeout = 600 ) # 10 min timeout for install + parse + full sync
@@ -311,6 +287,9 @@ def _run_postprocess_container(self) -> tuple[Path | None, str | None]:
311287 if proc .returncode == 0 :
312288 logger .info ("Post-processing complete: %s" , s3_url )
313289 return parquet_path if parquet_path .exists () else None , s3_url
290+ if proc .returncode == POSTPROCESS_PARSE_FAILED_EXIT :
291+ logger .warning ("srtlog parsing failed, but raw logs were still uploaded to %s" , s3_url )
292+ return parquet_path if parquet_path .exists () else None , s3_url
314293 else :
315294 logger .warning ("Post-processing failed (exit code: %s)" , proc .returncode )
316295 return parquet_path if parquet_path .exists () else None , None
@@ -323,6 +302,58 @@ def _run_postprocess_container(self) -> tuple[Path | None, str | None]:
323302 logger .warning ("Post-processing container failed: %s" , e )
324303 return None , None
325304
305+ def _build_postprocess_script (self , s3_url : str , endpoint_flag : str ) -> str :
306+ """Build the post-processing shell script.
307+
308+ Upload is always attempted if awscli installs successfully. Parsing is
309+ best-effort so raw logs survive parser/tooling failures.
310+ """
311+ return f"""
312+ set -u
313+ set -o pipefail
314+
315+ PARSE_STATUS=0
316+ UPLOAD_STATUS=0
317+
318+ echo "Installing uv and awscli..."
319+ if ! pip install uv awscli; then
320+ echo "Failed to install uv/awscli"
321+ exit { POSTPROCESS_UPLOAD_FAILED_EXIT }
322+ fi
323+
324+ echo "Installing srtlog..."
325+ if cd /tmp && git clone --depth 1 https://github.com/ishandhanani/srtlog.git && uv pip install --system ./srtlog; then
326+ echo "Running srtlog parse..."
327+ cd /logs
328+ srtlog parse . || PARSE_STATUS=$?
329+ else
330+ echo "Failed to install srtlog; continuing with raw log upload"
331+ PARSE_STATUS=1
332+ fi
333+
334+ cat > /logs/postprocess-status.json <<EOF
335+ {{"parse_status": $PARSE_STATUS, "s3_url": "{ s3_url } "}}
336+ EOF
337+
338+ echo "Uploading entire log directory to S3..."
339+ aws s3 sync /logs { s3_url } { endpoint_flag } || UPLOAD_STATUS=$?
340+
341+ if [ "$UPLOAD_STATUS" -ne 0 ]; then
342+ echo "Upload failed with status $UPLOAD_STATUS"
343+ exit { POSTPROCESS_UPLOAD_FAILED_EXIT }
344+ fi
345+
346+ echo "Upload complete: { s3_url } "
347+ echo ""
348+ echo "Uploaded files:"
349+ find /logs -type f | wc -l
350+ echo "files total"
351+
352+ if [ "$PARSE_STATUS" -ne 0 ]; then
353+ exit { POSTPROCESS_PARSE_FAILED_EXIT }
354+ fi
355+ """
356+
326357 def _report_metrics (self , benchmark_results : dict [str , Any ] | None , s3_url : str | None , exit_code : int ) -> None :
327358 """Report metrics to dashboard via status API.
328359
@@ -443,7 +474,7 @@ def _run_ai_analysis(self, config: AIAnalysisConfig) -> None:
443474 nodelist = [self .runtime .nodes .head ],
444475 output = str (analysis_log ),
445476 container_image = "python:3.11" ,
446- container_mounts = {str ( self .runtime .log_dir ): "/logs" },
477+ container_mounts = {self .runtime .log_dir : Path ( "/logs" ) },
447478 env_to_set = env_to_set ,
448479 )
449480
0 commit comments