Skip to content

Commit 95d3519

Browse files
Claudeilaflott
andauthored
Enhance compliance checking with centralized output gathering
- Add reusable gather_test_outputs.sh script for QA pipelines - Run comprehensive test suite (CMIP6 + CMIP7 examples) - Categorize outputs by CMIP version (cmip6/cmip7/other) - Generate manifest and summary files for traceability - Update workflow to use centralized gathering approach - Upload both test outputs and compliance reports as artifacts - Document reusable approach for other QA tools (ESGF-QA, etc.) - Focus on CMIP7 examples per user request Agent-Logs-Url: https://github.com/ilaflott/fremorizer/sessions/80a31504-3038-4dea-9af9-6f5ceb1e3641 Co-authored-by: ilaflott <6273252+ilaflott@users.noreply.github.com>
1 parent db9317e commit 95d3519

File tree

3 files changed

+508
-64
lines changed

3 files changed

+508
-64
lines changed
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
#!/bin/bash
2+
# Script to gather all NetCDF test outputs into a centralized location
3+
# This script is designed to be reusable by other QA pipelines (IOOS compliance-checker, ESGF-QA, etc.)
4+
#
5+
# Usage: gather_test_outputs.sh <output_directory>
6+
#
7+
# The script:
8+
# 1. Searches for all NetCDF files generated by tests
9+
# 2. Copies them to a centralized directory with preserved structure
10+
# 3. Creates a manifest file listing all gathered files
11+
# 4. Categorizes files by CMIP version (CMIP6 vs CMIP7)
12+
13+
set -e
14+
15+
# Default output directory if not specified
16+
OUTPUT_DIR="${1:-/tmp/qa_test_outputs}"
17+
18+
echo "=========================================="
19+
echo "Gathering Test Outputs for QA Validation"
20+
echo "=========================================="
21+
echo "Output directory: ${OUTPUT_DIR}"
22+
echo ""
23+
24+
# Create the centralized output directory
25+
mkdir -p "${OUTPUT_DIR}"
26+
mkdir -p "${OUTPUT_DIR}/cmip6"
27+
mkdir -p "${OUTPUT_DIR}/cmip7"
28+
mkdir -p "${OUTPUT_DIR}/other"
29+
30+
# Initialize counters
31+
total_files=0
32+
cmip6_files=0
33+
cmip7_files=0
34+
other_files=0
35+
36+
# Create manifest file
37+
MANIFEST="${OUTPUT_DIR}/manifest.txt"
38+
echo "# Test Output Manifest" > "${MANIFEST}"
39+
echo "# Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "${MANIFEST}"
40+
echo "# " >> "${MANIFEST}"
41+
42+
# Find all NetCDF files in test output directories
43+
# Primary locations:
44+
# - fremorizer/tests/test_files/outdir (from test_cmor_run_subtool.py)
45+
# - fremorizer/tests/test_files/outdir_ppan_only (from test_cmor_run_subtool_further_examples.py)
46+
47+
echo "Searching for NetCDF files in test directories..."
48+
echo ""
49+
50+
# Function to categorize and copy files
51+
gather_files() {
52+
local search_dir=$1
53+
local label=$2
54+
55+
if [ ! -d "${search_dir}" ]; then
56+
echo "Warning: Directory ${search_dir} not found, skipping"
57+
return
58+
fi
59+
60+
echo "Gathering from: ${search_dir} (${label})"
61+
62+
# Find all .nc files
63+
while IFS= read -r -d '' ncfile; do
64+
if [ -f "${ncfile}" ]; then
65+
# Determine CMIP version from file path or content
66+
cmip_version="other"
67+
if [[ "${ncfile}" == *"CMIP6"* ]]; then
68+
cmip_version="cmip6"
69+
((cmip6_files++))
70+
elif [[ "${ncfile}" == *"CMIP7"* ]]; then
71+
cmip_version="cmip7"
72+
((cmip7_files++))
73+
else
74+
# Try to detect from file attributes
75+
if command -v ncdump &> /dev/null; then
76+
if ncdump -h "${ncfile}" 2>/dev/null | grep -q 'mip_era = "CMIP7"'; then
77+
cmip_version="cmip7"
78+
((cmip7_files++))
79+
elif ncdump -h "${ncfile}" 2>/dev/null | grep -q 'mip_era = "CMIP6"'; then
80+
cmip_version="cmip6"
81+
((cmip6_files++))
82+
else
83+
((other_files++))
84+
fi
85+
else
86+
((other_files++))
87+
fi
88+
fi
89+
90+
# Create unique filename to avoid collisions
91+
basename=$(basename "${ncfile}")
92+
# Add a hash of the full path to ensure uniqueness
93+
path_hash=$(echo "${ncfile}" | md5sum | cut -c1-8)
94+
unique_name="${path_hash}_${basename}"
95+
96+
# Copy to categorized directory
97+
dest_file="${OUTPUT_DIR}/${cmip_version}/${unique_name}"
98+
cp "${ncfile}" "${dest_file}"
99+
100+
# Add to manifest
101+
echo "${ncfile} -> ${cmip_version}/${unique_name}" >> "${MANIFEST}"
102+
103+
((total_files++))
104+
105+
echo "${ncfile}"
106+
echo "${cmip_version}/${unique_name}"
107+
fi
108+
done < <(find "${search_dir}" -name "*.nc" -type f -print0 2>/dev/null)
109+
}
110+
111+
# Gather files from different test output directories
112+
gather_files "fremorizer/tests/test_files/outdir" "CMIP6 basic tests"
113+
gather_files "fremorizer/tests/test_files/outdir_ppan_only" "Extended test examples"
114+
115+
# Add summary to manifest
116+
echo "" >> "${MANIFEST}"
117+
echo "# Summary" >> "${MANIFEST}"
118+
echo "# Total files: ${total_files}" >> "${MANIFEST}"
119+
echo "# CMIP6 files: ${cmip6_files}" >> "${MANIFEST}"
120+
echo "# CMIP7 files: ${cmip7_files}" >> "${MANIFEST}"
121+
echo "# Other files: ${other_files}" >> "${MANIFEST}"
122+
123+
echo ""
124+
echo "=========================================="
125+
echo "Gathering Complete"
126+
echo "=========================================="
127+
echo "Total files gathered: ${total_files}"
128+
echo " - CMIP6: ${cmip6_files}"
129+
echo " - CMIP7: ${cmip7_files}"
130+
echo " - Other: ${other_files}"
131+
echo ""
132+
echo "Output directory: ${OUTPUT_DIR}"
133+
echo "Manifest file: ${MANIFEST}"
134+
echo ""
135+
136+
# Create a summary file for easy reference
137+
SUMMARY="${OUTPUT_DIR}/SUMMARY.md"
138+
cat > "${SUMMARY}" << EOF
139+
# Test Output Summary
140+
141+
**Generated:** $(date -u +%Y-%m-%dT%H:%M:%SZ)
142+
143+
## Statistics
144+
145+
- **Total NetCDF files:** ${total_files}
146+
- **CMIP6 files:** ${cmip6_files} (in \`cmip6/\` directory)
147+
- **CMIP7 files:** ${cmip7_files} (in \`cmip7/\` directory)
148+
- **Other files:** ${other_files} (in \`other/\` directory)
149+
150+
## Directory Structure
151+
152+
\`\`\`
153+
${OUTPUT_DIR}/
154+
├── cmip6/ # CMIP6-specific outputs
155+
├── cmip7/ # CMIP7-specific outputs
156+
├── other/ # Unclassified outputs
157+
├── manifest.txt # Detailed file mapping
158+
└── SUMMARY.md # This file
159+
\`\`\`
160+
161+
## Files by Category
162+
163+
### CMIP6 Files (${cmip6_files})
164+
EOF
165+
166+
# List CMIP6 files
167+
if [ ${cmip6_files} -gt 0 ]; then
168+
find "${OUTPUT_DIR}/cmip6" -name "*.nc" -type f | sort | while read -r f; do
169+
echo "- \`$(basename "$f")\`" >> "${SUMMARY}"
170+
done
171+
else
172+
echo "- None" >> "${SUMMARY}"
173+
fi
174+
175+
cat >> "${SUMMARY}" << EOF
176+
177+
### CMIP7 Files (${cmip7_files})
178+
EOF
179+
180+
# List CMIP7 files
181+
if [ ${cmip7_files} -gt 0 ]; then
182+
find "${OUTPUT_DIR}/cmip7" -name "*.nc" -type f | sort | while read -r f; do
183+
echo "- \`$(basename "$f")\`" >> "${SUMMARY}"
184+
done
185+
else
186+
echo "- None" >> "${SUMMARY}"
187+
fi
188+
189+
cat >> "${SUMMARY}" << EOF
190+
191+
### Other Files (${other_files})
192+
EOF
193+
194+
# List other files
195+
if [ ${other_files} -gt 0 ]; then
196+
find "${OUTPUT_DIR}/other" -name "*.nc" -type f | sort | while read -r f; do
197+
echo "- \`$(basename "$f")\`" >> "${SUMMARY}"
198+
done
199+
else
200+
echo "- None" >> "${SUMMARY}"
201+
fi
202+
203+
cat >> "${SUMMARY}" << EOF
204+
205+
## Usage for QA Pipelines
206+
207+
This centralized output directory can be used by various QA tools:
208+
209+
### IOOS Compliance Checker
210+
\`\`\`bash
211+
for ncfile in ${OUTPUT_DIR}/*/*.nc; do
212+
compliance-checker --test=cf --criteria=normal "\${ncfile}"
213+
done
214+
\`\`\`
215+
216+
### ESGF-QA
217+
\`\`\`bash
218+
for ncfile in ${OUTPUT_DIR}/*/*.nc; do
219+
esgf-qa --project CMIP6 "\${ncfile}"
220+
done
221+
\`\`\`
222+
223+
### Custom Validation
224+
\`\`\`bash
225+
# Process all files
226+
find ${OUTPUT_DIR} -name "*.nc" -type f -exec your-validator {} \;
227+
228+
# Process only CMIP7 files
229+
find ${OUTPUT_DIR}/cmip7 -name "*.nc" -type f -exec your-validator {} \;
230+
\`\`\`
231+
232+
## See Also
233+
234+
- \`manifest.txt\` - Full mapping of source files to gathered files
235+
- Individual QA pipeline documentation for specific usage instructions
236+
EOF
237+
238+
cat "${SUMMARY}"
239+
240+
# Exit successfully if we found any files
241+
if [ ${total_files} -gt 0 ]; then
242+
exit 0
243+
else
244+
echo "::warning::No NetCDF files were found in test output directories"
245+
exit 1
246+
fi

0 commit comments

Comments
 (0)