Skip to content

Commit f4b0d99

Browse files
Merge branch 'main' into dev/primus_patch_moe_overlap
2 parents d7fa4a2 + 7f74177 commit f4b0d99

File tree

6 files changed

+1124
-1
lines changed

6 files changed

+1124
-1
lines changed

runner/helpers/execute_hooks.sh

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#!/bin/bash
2+
###############################################################################
3+
# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
4+
#
5+
# See LICENSE for license information.
6+
###############################################################################
7+
#
8+
# Execute hooks based on command group and name
9+
# Usage: execute_hooks <hook_group> <hook_name> [args...]
10+
#
11+
12+
# Requires common.sh to be sourced
13+
if [[ -z "${__PRIMUS_COMMON_SOURCED:-}" ]]; then
14+
# Fallback logging if common.sh not loaded
15+
LOG_INFO_RANK0() {
16+
if [ "${NODE_RANK:-0}" -eq 0 ]; then
17+
echo "[INFO] $*"
18+
fi
19+
}
20+
LOG_ERROR_RANK0() {
21+
if [ "${NODE_RANK:-0}" -eq 0 ]; then
22+
echo "[ERROR] $*" >&2
23+
fi
24+
}
25+
LOG_WARN() {
26+
if [ "${NODE_RANK:-0}" -eq 0 ]; then
27+
echo "[WARN] $*" >&2
28+
fi
29+
}
30+
fi
31+
32+
# Execute hooks for a given command
33+
# Args:
34+
# $1: hook_group (e.g., "train", "benchmark")
35+
# $2: hook_name (e.g., "pretrain", "gemm")
36+
# $@: Additional arguments to pass to hooks
37+
execute_hooks() {
38+
if [[ $# -lt 2 ]]; then
39+
LOG_INFO_RANK0 "[Hooks] No hook target specified (need group and name)"
40+
return 0
41+
fi
42+
43+
local hook_group="$1"
44+
local hook_name="$2"
45+
shift 2
46+
local hook_args=("$@")
47+
48+
# Determine script directory
49+
local script_dir
50+
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
51+
52+
local hook_dir="${script_dir}/hooks/${hook_group}/${hook_name}"
53+
54+
if [[ ! -d "$hook_dir" ]]; then
55+
LOG_INFO_RANK0 "[Hooks] No hook directory for [$hook_group/$hook_name]"
56+
return 0
57+
fi
58+
59+
LOG_INFO_RANK0 "[Hooks] Detected hooks directory: $hook_dir"
60+
61+
# Find all hook files (*.sh and *.py)
62+
local hook_files=()
63+
mapfile -t hook_files < <(find "$hook_dir" -maxdepth 1 -type f \( -name "*.sh" -o -name "*.py" \) | sort)
64+
65+
if [[ ${#hook_files[@]} -eq 0 ]]; then
66+
LOG_INFO_RANK0 "[Hooks] No hook files found in $hook_dir"
67+
return 0
68+
fi
69+
70+
# Execute each hook file
71+
for hook_file in "${hook_files[@]}"; do
72+
LOG_INFO_RANK0 "[Hooks] Executing hook: $hook_file ${hook_args[*]}"
73+
74+
start_time=$(date +%s)
75+
76+
if [[ "$hook_file" == *.sh ]]; then
77+
if ! bash "$hook_file" "${hook_args[@]}"; then
78+
LOG_ERROR_RANK0 "[Hooks] Hook failed: $hook_file (exit code: $?)"
79+
return 1
80+
fi
81+
elif [[ "$hook_file" == *.py ]]; then
82+
if ! python3 "$hook_file" "${hook_args[@]}"; then
83+
LOG_ERROR_RANK0 "[Hooks] Hook failed: $hook_file (exit code: $?)"
84+
return 1
85+
fi
86+
else
87+
LOG_WARN "[Hooks] Skipping unknown hook type: $hook_file"
88+
fi
89+
90+
duration=$(( $(date +%s) - start_time ))
91+
LOG_INFO_RANK0 "[Hooks] Hook $hook_file finished in ${duration}s"
92+
done
93+
94+
LOG_INFO_RANK0 "[Hooks] All hooks executed successfully"
95+
return 0
96+
}
97+
98+
# If called directly (not sourced), execute the function
99+
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
100+
# Always source common.sh when called directly, as functions are not inherited by subshells
101+
# Unset the guard variable to force re-sourcing in this new shell instance
102+
unset __PRIMUS_COMMON_SOURCED
103+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
104+
# shellcheck disable=SC1091
105+
source "$SCRIPT_DIR/../lib/common.sh" || {
106+
echo "[ERROR] Failed to load common library" >&2
107+
exit 1
108+
}
109+
execute_hooks "$@"
110+
fi

runner/helpers/execute_patches.sh

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#!/bin/bash
2+
###############################################################################
3+
# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
4+
#
5+
# See LICENSE for license information.
6+
###############################################################################
7+
#
8+
# Execute patch scripts
9+
# Usage: execute_patches <patch_script1> [patch_script2] ...
10+
#
11+
# Exit codes from patch scripts:
12+
# 0 - Success, continue to next patch
13+
# 2 - Skip this patch (not an error)
14+
# other - Failure, stop execution
15+
#
16+
# Example patch script with conditional skip:
17+
# #!/bin/bash
18+
# # Check if patch is needed
19+
# if [[ -f /tmp/already_patched ]]; then
20+
# echo "Patch already applied, skipping"
21+
# exit 2 # Skip this patch
22+
# fi
23+
#
24+
# # Apply patch
25+
# echo "Applying patch..."
26+
# # ... patch logic ...
27+
# exit 0 # Success
28+
#
29+
30+
# Requires common.sh to be sourced
31+
if [[ -z "${__PRIMUS_COMMON_SOURCED:-}" ]]; then
32+
# Fallback logging if common.sh not loaded
33+
LOG_INFO_RANK0() {
34+
if [ "${NODE_RANK:-0}" -eq 0 ]; then
35+
echo "[INFO] $*" >&2
36+
fi
37+
}
38+
LOG_ERROR_RANK0() {
39+
if [ "${NODE_RANK:-0}" -eq 0 ]; then
40+
echo "[ERROR] $*" >&2
41+
fi
42+
}
43+
LOG_SUCCESS_RANK0() {
44+
if [ "${NODE_RANK:-0}" -eq 0 ]; then
45+
echo "[SUCCESS] $*"
46+
fi
47+
}
48+
fi
49+
50+
# Execute multiple patch scripts
51+
# Args:
52+
# $@: Patch script paths
53+
execute_patches() {
54+
if [[ $# -eq 0 ]]; then
55+
LOG_INFO_RANK0 "[Execute Patches] No patch scripts specified"
56+
return 0
57+
fi
58+
59+
local patch_scripts=("$@")
60+
61+
LOG_INFO_RANK0 "[Execute Patches] Detected patch scripts: ${patch_scripts[*]}"
62+
63+
for patch in "${patch_scripts[@]}"; do
64+
if [[ ! -f "$patch" ]]; then
65+
LOG_ERROR_RANK0 "[Execute Patches] Patch script not found: $patch"
66+
return 1
67+
fi
68+
69+
if [[ ! -r "$patch" ]]; then
70+
LOG_ERROR_RANK0 "[Execute Patches] Patch script not readable: $patch"
71+
return 1
72+
fi
73+
74+
LOG_INFO_RANK0 "[Execute Patches] Running patch: bash $patch"
75+
76+
bash "$patch"
77+
local exit_code=$?
78+
79+
if [[ $exit_code -eq 0 ]]; then
80+
LOG_INFO_RANK0 "[Execute Patches] Patch completed successfully: $patch"
81+
elif [[ $exit_code -eq 2 ]]; then
82+
LOG_INFO_RANK0 "[Execute Patches] Patch skipped (exit code 2): $patch"
83+
else
84+
LOG_ERROR_RANK0 "[Execute Patches] Patch script failed: $patch (exit code: $exit_code)"
85+
return 1
86+
fi
87+
done
88+
89+
LOG_SUCCESS_RANK0 "[Execute Patches] All patch scripts executed successfully"
90+
return 0
91+
}
92+
93+
# If called directly (not sourced), execute the function
94+
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
95+
# Always source common.sh when called directly, as functions are not inherited by subshells
96+
# Unset the guard variable to force re-sourcing in this new shell instance
97+
unset __PRIMUS_COMMON_SOURCED
98+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
99+
# shellcheck disable=SC1091
100+
source "$SCRIPT_DIR/../lib/common.sh" || {
101+
echo "[ERROR] Failed to load common library" >&2
102+
exit 1
103+
}
104+
execute_patches "$@"
105+
fi

runner/lib/common.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,12 @@ LOG_SUCCESS_RANK0() {
142142
fi
143143
}
144144

145+
LOG_ERROR_RANK0() {
146+
if [[ "${NODE_RANK:-0}" == "0" ]]; then
147+
LOG_ERROR "$@"
148+
fi
149+
}
150+
145151
# ---------------------------------------------------------------------------
146152
# Simple Print Functions (without timestamps and prefixes)
147153
# ---------------------------------------------------------------------------
@@ -534,7 +540,7 @@ trap run_cleanup_hooks EXIT INT TERM
534540
# ---------------------------------------------------------------------------
535541
export -f _get_timestamp _get_node_id _log
536542
export -f LOG_DEBUG LOG_INFO LOG_WARN LOG_ERROR LOG_SUCCESS
537-
export -f LOG_INFO_RANK0 LOG_DEBUG_RANK0 LOG_SUCCESS_RANK0
543+
export -f LOG_INFO_RANK0 LOG_DEBUG_RANK0 LOG_SUCCESS_RANK0 LOG_ERROR_RANK0
538544
export -f PRINT_DEBUG PRINT_INFO PRINT_WARN PRINT_ERROR PRINT_SUCCESS
539545
export -f PRINT_INFO_RANK0 PRINT_DEBUG_RANK0 PRINT_SUCCESS_RANK0
540546
export -f log_exported_vars print_section

0 commit comments

Comments
 (0)