|
| 1 | +#!/bin/bash |
| 2 | +# Script to gather all NetCDF test outputs into a centralized location |
| 3 | +# This script is designed to be reusable by other QA pipelines (IOOS compliance-checker, ESGF-QA, etc.) |
| 4 | +# |
| 5 | +# Usage: gather_test_outputs.sh <output_directory> |
| 6 | +# |
| 7 | +# The script: |
| 8 | +# 1. Searches for all NetCDF files generated by tests |
| 9 | +# 2. Copies them to a centralized directory with preserved structure |
| 10 | +# 3. Creates a manifest file listing all gathered files |
| 11 | +# 4. Categorizes files by CMIP version (CMIP6 vs CMIP7) |
| 12 | + |
| 13 | +set -e |
| 14 | + |
| 15 | +# Default output directory if not specified |
| 16 | +OUTPUT_DIR="${1:-/tmp/qa_test_outputs}" |
| 17 | + |
| 18 | +echo "==========================================" |
| 19 | +echo "Gathering Test Outputs for QA Validation" |
| 20 | +echo "==========================================" |
| 21 | +echo "Output directory: ${OUTPUT_DIR}" |
| 22 | +echo "" |
| 23 | + |
| 24 | +# Create the centralized output directory |
| 25 | +mkdir -p "${OUTPUT_DIR}" |
| 26 | +mkdir -p "${OUTPUT_DIR}/cmip6" |
| 27 | +mkdir -p "${OUTPUT_DIR}/cmip7" |
| 28 | +mkdir -p "${OUTPUT_DIR}/other" |
| 29 | + |
| 30 | +# Initialize counters |
| 31 | +total_files=0 |
| 32 | +cmip6_files=0 |
| 33 | +cmip7_files=0 |
| 34 | +other_files=0 |
| 35 | + |
| 36 | +# Create manifest file |
| 37 | +MANIFEST="${OUTPUT_DIR}/manifest.txt" |
| 38 | +echo "# Test Output Manifest" > "${MANIFEST}" |
| 39 | +echo "# Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "${MANIFEST}" |
| 40 | +echo "# " >> "${MANIFEST}" |
| 41 | + |
| 42 | +# Find all NetCDF files in test output directories |
| 43 | +# Primary locations: |
| 44 | +# - fremorizer/tests/test_files/outdir (from test_cmor_run_subtool.py) |
| 45 | +# - fremorizer/tests/test_files/outdir_ppan_only (from test_cmor_run_subtool_further_examples.py) |
| 46 | + |
| 47 | +echo "Searching for NetCDF files in test directories..." |
| 48 | +echo "" |
| 49 | + |
| 50 | +# Function to categorize and copy files |
| 51 | +gather_files() { |
| 52 | + local search_dir=$1 |
| 53 | + local label=$2 |
| 54 | + |
| 55 | + if [ ! -d "${search_dir}" ]; then |
| 56 | + echo "Warning: Directory ${search_dir} not found, skipping" |
| 57 | + return |
| 58 | + fi |
| 59 | + |
| 60 | + echo "Gathering from: ${search_dir} (${label})" |
| 61 | + |
| 62 | + # Find all .nc files |
| 63 | + while IFS= read -r -d '' ncfile; do |
| 64 | + if [ -f "${ncfile}" ]; then |
| 65 | + # Determine CMIP version from file path or content |
| 66 | + cmip_version="other" |
| 67 | + if [[ "${ncfile}" == *"CMIP6"* ]]; then |
| 68 | + cmip_version="cmip6" |
| 69 | + ((cmip6_files++)) |
| 70 | + elif [[ "${ncfile}" == *"CMIP7"* ]]; then |
| 71 | + cmip_version="cmip7" |
| 72 | + ((cmip7_files++)) |
| 73 | + else |
| 74 | + # Try to detect from file attributes |
| 75 | + if command -v ncdump &> /dev/null; then |
| 76 | + if ncdump -h "${ncfile}" 2>/dev/null | grep -q 'mip_era = "CMIP7"'; then |
| 77 | + cmip_version="cmip7" |
| 78 | + ((cmip7_files++)) |
| 79 | + elif ncdump -h "${ncfile}" 2>/dev/null | grep -q 'mip_era = "CMIP6"'; then |
| 80 | + cmip_version="cmip6" |
| 81 | + ((cmip6_files++)) |
| 82 | + else |
| 83 | + ((other_files++)) |
| 84 | + fi |
| 85 | + else |
| 86 | + ((other_files++)) |
| 87 | + fi |
| 88 | + fi |
| 89 | + |
| 90 | + # Create unique filename to avoid collisions |
| 91 | + basename=$(basename "${ncfile}") |
| 92 | + # Add a hash of the full path to ensure uniqueness |
| 93 | + path_hash=$(echo "${ncfile}" | md5sum | cut -c1-8) |
| 94 | + unique_name="${path_hash}_${basename}" |
| 95 | + |
| 96 | + # Copy to categorized directory |
| 97 | + dest_file="${OUTPUT_DIR}/${cmip_version}/${unique_name}" |
| 98 | + cp "${ncfile}" "${dest_file}" |
| 99 | + |
| 100 | + # Add to manifest |
| 101 | + echo "${ncfile} -> ${cmip_version}/${unique_name}" >> "${MANIFEST}" |
| 102 | + |
| 103 | + ((total_files++)) |
| 104 | + |
| 105 | + echo " ✓ ${ncfile}" |
| 106 | + echo " → ${cmip_version}/${unique_name}" |
| 107 | + fi |
| 108 | + done < <(find "${search_dir}" -name "*.nc" -type f -print0 2>/dev/null) |
| 109 | +} |
| 110 | + |
| 111 | +# Gather files from different test output directories |
| 112 | +gather_files "fremorizer/tests/test_files/outdir" "CMIP6 basic tests" |
| 113 | +gather_files "fremorizer/tests/test_files/outdir_ppan_only" "Extended test examples" |
| 114 | + |
| 115 | +# Add summary to manifest |
| 116 | +echo "" >> "${MANIFEST}" |
| 117 | +echo "# Summary" >> "${MANIFEST}" |
| 118 | +echo "# Total files: ${total_files}" >> "${MANIFEST}" |
| 119 | +echo "# CMIP6 files: ${cmip6_files}" >> "${MANIFEST}" |
| 120 | +echo "# CMIP7 files: ${cmip7_files}" >> "${MANIFEST}" |
| 121 | +echo "# Other files: ${other_files}" >> "${MANIFEST}" |
| 122 | + |
| 123 | +echo "" |
| 124 | +echo "==========================================" |
| 125 | +echo "Gathering Complete" |
| 126 | +echo "==========================================" |
| 127 | +echo "Total files gathered: ${total_files}" |
| 128 | +echo " - CMIP6: ${cmip6_files}" |
| 129 | +echo " - CMIP7: ${cmip7_files}" |
| 130 | +echo " - Other: ${other_files}" |
| 131 | +echo "" |
| 132 | +echo "Output directory: ${OUTPUT_DIR}" |
| 133 | +echo "Manifest file: ${MANIFEST}" |
| 134 | +echo "" |
| 135 | + |
| 136 | +# Create a summary file for easy reference |
| 137 | +SUMMARY="${OUTPUT_DIR}/SUMMARY.md" |
| 138 | +cat > "${SUMMARY}" << EOF |
| 139 | +# Test Output Summary |
| 140 | +
|
| 141 | +**Generated:** $(date -u +%Y-%m-%dT%H:%M:%SZ) |
| 142 | +
|
| 143 | +## Statistics |
| 144 | +
|
| 145 | +- **Total NetCDF files:** ${total_files} |
| 146 | +- **CMIP6 files:** ${cmip6_files} (in \`cmip6/\` directory) |
| 147 | +- **CMIP7 files:** ${cmip7_files} (in \`cmip7/\` directory) |
| 148 | +- **Other files:** ${other_files} (in \`other/\` directory) |
| 149 | +
|
| 150 | +## Directory Structure |
| 151 | +
|
| 152 | +\`\`\` |
| 153 | +${OUTPUT_DIR}/ |
| 154 | +├── cmip6/ # CMIP6-specific outputs |
| 155 | +├── cmip7/ # CMIP7-specific outputs |
| 156 | +├── other/ # Unclassified outputs |
| 157 | +├── manifest.txt # Detailed file mapping |
| 158 | +└── SUMMARY.md # This file |
| 159 | +\`\`\` |
| 160 | +
|
| 161 | +## Files by Category |
| 162 | +
|
| 163 | +### CMIP6 Files (${cmip6_files}) |
| 164 | +EOF |
| 165 | + |
| 166 | +# List CMIP6 files |
| 167 | +if [ ${cmip6_files} -gt 0 ]; then |
| 168 | + find "${OUTPUT_DIR}/cmip6" -name "*.nc" -type f | sort | while read -r f; do |
| 169 | + echo "- \`$(basename "$f")\`" >> "${SUMMARY}" |
| 170 | + done |
| 171 | +else |
| 172 | + echo "- None" >> "${SUMMARY}" |
| 173 | +fi |
| 174 | + |
| 175 | +cat >> "${SUMMARY}" << EOF |
| 176 | +
|
| 177 | +### CMIP7 Files (${cmip7_files}) |
| 178 | +EOF |
| 179 | + |
| 180 | +# List CMIP7 files |
| 181 | +if [ ${cmip7_files} -gt 0 ]; then |
| 182 | + find "${OUTPUT_DIR}/cmip7" -name "*.nc" -type f | sort | while read -r f; do |
| 183 | + echo "- \`$(basename "$f")\`" >> "${SUMMARY}" |
| 184 | + done |
| 185 | +else |
| 186 | + echo "- None" >> "${SUMMARY}" |
| 187 | +fi |
| 188 | + |
| 189 | +cat >> "${SUMMARY}" << EOF |
| 190 | +
|
| 191 | +### Other Files (${other_files}) |
| 192 | +EOF |
| 193 | + |
| 194 | +# List other files |
| 195 | +if [ ${other_files} -gt 0 ]; then |
| 196 | + find "${OUTPUT_DIR}/other" -name "*.nc" -type f | sort | while read -r f; do |
| 197 | + echo "- \`$(basename "$f")\`" >> "${SUMMARY}" |
| 198 | + done |
| 199 | +else |
| 200 | + echo "- None" >> "${SUMMARY}" |
| 201 | +fi |
| 202 | + |
| 203 | +cat >> "${SUMMARY}" << EOF |
| 204 | +
|
| 205 | +## Usage for QA Pipelines |
| 206 | +
|
| 207 | +This centralized output directory can be used by various QA tools: |
| 208 | +
|
| 209 | +### IOOS Compliance Checker |
| 210 | +\`\`\`bash |
| 211 | +for ncfile in ${OUTPUT_DIR}/*/*.nc; do |
| 212 | + compliance-checker --test=cf --criteria=normal "\${ncfile}" |
| 213 | +done |
| 214 | +\`\`\` |
| 215 | +
|
| 216 | +### ESGF-QA |
| 217 | +\`\`\`bash |
| 218 | +for ncfile in ${OUTPUT_DIR}/*/*.nc; do |
| 219 | + esgf-qa --project CMIP6 "\${ncfile}" |
| 220 | +done |
| 221 | +\`\`\` |
| 222 | +
|
| 223 | +### Custom Validation |
| 224 | +\`\`\`bash |
| 225 | +# Process all files |
| 226 | +find ${OUTPUT_DIR} -name "*.nc" -type f -exec your-validator {} \; |
| 227 | +
|
| 228 | +# Process only CMIP7 files |
| 229 | +find ${OUTPUT_DIR}/cmip7 -name "*.nc" -type f -exec your-validator {} \; |
| 230 | +\`\`\` |
| 231 | +
|
| 232 | +## See Also |
| 233 | +
|
| 234 | +- \`manifest.txt\` - Full mapping of source files to gathered files |
| 235 | +- Individual QA pipeline documentation for specific usage instructions |
| 236 | +EOF |
| 237 | + |
| 238 | +cat "${SUMMARY}" |
| 239 | + |
| 240 | +# Exit successfully if we found any files |
| 241 | +if [ ${total_files} -gt 0 ]; then |
| 242 | + exit 0 |
| 243 | +else |
| 244 | + echo "::warning::No NetCDF files were found in test output directories" |
| 245 | + exit 1 |
| 246 | +fi |
0 commit comments