Skip to content

Commit b19ff67

Browse files
committed
Add Copyright, Usage guide and dump model manifests
Signed-off-by: Shiva Krishna, Merla <smerla@nvidia.com>
1 parent 4a07e36 commit b19ff67

1 file changed

Lines changed: 47 additions & 0 deletions

File tree

hack/must-gather.sh

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,41 @@
11
#!/usr/bin/env bash
22

3+
# Copyright 2025 NVIDIA CORPORATION
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
###############################################################################
18+
# NVIDIA NIM & NeMo Must-Gather Script
19+
#
20+
# This script collects logs and specs from:
21+
# - GPU node status and descriptions
22+
# - Kubernetes version info
23+
# - NIM Operator
24+
# - NIMPipeline/NIMService/NIMCache CRs and pods
25+
# - NIM Model Manifest ConfigMaps
26+
# - NeMo microservices CRs and pods (optional)
27+
#
28+
# Usage:
29+
# export OPERATOR_NAMESPACE=<namespace where NIM Operator is installed>
30+
# export NIM_NAMESPACE=<namespace where NIMService/NIMCache are deployed>
31+
# export NEMO_NAMESPACE=<namespace where NeMo microservices are deployed> # Optional
32+
#
33+
# ./must-gather-nim-nemo.sh
34+
#
35+
# Output will be saved to:
36+
# ${ARTIFACT_DIR:-/tmp/nim-nemo-must-gather_<timestamp>}
37+
###############################################################################
38+
339
set -o nounset
440
set -o errexit
541
set -x
@@ -65,6 +101,17 @@ $K get nimcaches.apps.nvidia.com -n "$NIM_NAMESPACE" -oyaml > "$ARTIFACT_DIR/nim
65101
$K get nimpipelines.apps.nvidia.com -n "$NIM_NAMESPACE" -oyaml > "$ARTIFACT_DIR/nim/nimpipelines.yaml" || true
66102
$K get nimservices.apps.nvidia.com -n "$NIM_NAMESPACE" -oyaml > "$ARTIFACT_DIR/nim/nimservices.yaml" || true
67103

104+
echo "Gathering ConfigMaps in $NIM_NAMESPACE owned by NIMCache"
105+
mkdir -p "$ARTIFACT_DIR/nim/configmaps"
106+
107+
for cm in $($K get configmaps -n "$NIM_NAMESPACE" -o name); do
108+
# Check if the ownerReference has kind: NIMCache
109+
if $K get "$cm" -n "$NIM_NAMESPACE" -o yaml | grep -A 5 'ownerReferences:' | grep -q 'kind: NIMCache'; then
110+
cm_name=$(basename "$cm")
111+
$K get "$cm" -n "$NIM_NAMESPACE" -oyaml > "$ARTIFACT_DIR/nim/configmaps/${cm_name}.yaml" || true
112+
fi
113+
done
114+
68115
echo "Gathering NIMService pods from $NIM_NAMESPACE"
69116
for pod in $($K get pods -n "$NIM_NAMESPACE" -l "app.kubernetes.io/part-of=nim-service,app.kubernetes.io/managed-by=k8s-nim-operator" -oname); do
70117
pod_name=$(basename "$pod")

0 commit comments

Comments
 (0)