feat: click image to replace without resetting settings #75
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Run notebooks and validate the results | |
| on: | |
| push: | |
| branches: | |
| - main | |
| pull_request: | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| run-notebooks: | |
| runs-on: arc-runners-org-nvidia-ai-bp-4-gpu | |
| env: | |
| PYTHON_VERSION: 3.12 | |
| NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} | |
| NGC_API_KEY: ${{ secrets.NGC_API_KEY }} | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v3 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| - name: Check Container Status | |
| run: | | |
| echo "===================== Container Status =====================" | |
| docker ps -a | |
| - name: Install dependencies | |
| run: | | |
| echo "Installing dependencies ..." | |
| python -m pip install --upgrade pip | |
| pip install ipykernel nbformat | |
| echo "Installing Python kernel..." | |
| python -m ipykernel install --user --name python3 --display-name "Python 3" | |
| echo "Verifying kernel installation..." | |
| jupyter kernelspec list | |
| - name: Run Notebooks | |
| run: | | |
| # Function to skip specified cells in a notebook | |
| # Supports individual cells and ranges: skip_cells notebook.ipynb 47-48 78-79 | |
| skip_cells() { | |
| local NOTEBOOK_PATH="$1" | |
| shift | |
| local CELLS_TO_SKIP=("$@") | |
| if [ ${#CELLS_TO_SKIP[@]} -eq 0 ]; then | |
| return 0 | |
| fi | |
| echo "⏭️ Skipping cells: ${CELLS_TO_SKIP[*]}" | |
| # Expand ranges and build Python list | |
| local EXPANDED_CELLS=() | |
| for item in "${CELLS_TO_SKIP[@]}"; do | |
| if [[ "$item" =~ ^([0-9]+)-([0-9]+)$ ]]; then | |
| # Range format: start-end | |
| local start="${BASH_REMATCH[1]}" | |
| local end="${BASH_REMATCH[2]}" | |
| for ((i=start; i<=end; i++)); do | |
| EXPANDED_CELLS+=("$i") | |
| done | |
| else | |
| # Single cell number | |
| EXPANDED_CELLS+=("$item") | |
| fi | |
| done | |
| local CELLS_LIST=$(printf '%s,' "${EXPANDED_CELLS[@]}") | |
| CELLS_LIST="[${CELLS_LIST%,}]" | |
| python3 -c "import nbformat; nb = nbformat.read('$NOTEBOOK_PATH', as_version=4); cells = $CELLS_LIST; exec('for i in cells:\\n if i < len(nb.cells):\\n nb.cells[i].source = \"# Skipped cell \" + str(i) if nb.cells[i].cell_type == \"code\" else \"<!-- Skipped cell \" + str(i) + \" -->\"'); nbformat.write(nb, '$NOTEBOOK_PATH'); print('✅ Skipped ' + str(len(cells)) + ' cell(s)')" | |
| } | |
| # Function to run a single notebook | |
| run_notebook() { | |
| local NOTEBOOK_PATH="$1" | |
| local FIX_TRTLLM_PATH="${2:-}" | |
| local NOTEBOOK_DIR=$(dirname "$NOTEBOOK_PATH") | |
| local NOTEBOOK_NAME=$(basename "$NOTEBOOK_PATH" .ipynb) | |
| local OUTPUT_NOTEBOOK="${NOTEBOOK_DIR}/${NOTEBOOK_NAME}_result.ipynb" | |
| # Create a temporary copy of the notebook for modification | |
| local TEMP_NOTEBOOK="${NOTEBOOK_DIR}/${NOTEBOOK_NAME}_temp.ipynb" | |
| cp "$NOTEBOOK_PATH" "$TEMP_NOTEBOOK" | |
| echo "================================" | |
| echo "Running: $NOTEBOOK_NAME" | |
| echo "================================" | |
| # Skip cloud NIM config and container teardown cells for 1_Deploy_Catalog_Enrichment.ipynb | |
| if [[ "$NOTEBOOK_NAME" == "1_Deploy_Catalog_Enrichment" ]]; then | |
| skip_cells "$TEMP_NOTEBOOK" 7 48 | |
| echo "Modifying notebook for CI environment..." | |
| # Modification 1: Change cache directory to local path | |
| echo " - Changing cache directory to local path..." | |
| sed -i 's|"local_nim_cache = os.path.expanduser(\\"~/.cache/nim\\")\\n"|"local_nim_cache = os.path.join(os.getcwd(), \\".cache\\", \\"nim\\")\\n"|g' "$TEMP_NOTEBOOK" | |
| echo " ✅ Modified cache directory path" | |
| echo "✅ All notebook modifications complete" | |
| fi | |
| # Run notebook with papermill | |
| papermill "$TEMP_NOTEBOOK" "$OUTPUT_NOTEBOOK" -k python3 --log-output --log-level DEBUG | |
| local EXIT_CODE=$? | |
| # Check results | |
| if [ $EXIT_CODE -ne 0 ]; then | |
| echo "❌ Notebook execution failed" | |
| rm -f "$TEMP_NOTEBOOK" | |
| return 1 | |
| fi | |
| if [ ! -f "$OUTPUT_NOTEBOOK" ]; then | |
| echo "❌ Output notebook not created" | |
| rm -f "$TEMP_NOTEBOOK" | |
| return 1 | |
| fi | |
| # Clean up temporary notebook | |
| rm -f "$TEMP_NOTEBOOK" | |
| echo "✅ Completed: $NOTEBOOK_NAME" | |
| echo "" | |
| return 0 | |
| } | |
| # Run all notebooks | |
| run_notebook "deploy/1_Deploy_Catalog_Enrichment.ipynb" || exit 1 | |
| - name: Convert results to HTML format | |
| if: always() | |
| run: | | |
| echo "Converting notebooks to HTML..." | |
| for notebook in deploy/*_result.ipynb; do | |
| if [ -f "$notebook" ]; then | |
| jupyter nbconvert --to html "$notebook" | |
| echo "✅ Converted $(basename $notebook)" | |
| fi | |
| done | |
| - name: Check NIM Services Status | |
| if: always() | |
| run: | | |
| # Check if the HTML files exist before running tests | |
| if [ ! -f "./deploy/1_Deploy_Catalog_Enrichment_result.html" ]; then | |
| echo "Warning: 1_Deploy_Catalog_Enrichment_result.html not found" | |
| fi | |
| echo "📋 Container status:" | |
| docker ps -a | |
| # Parallel NIM service readiness check | |
| echo "🔍 Starting parallel check for all NIM services..." | |
| # Service configuration (avoiding associative arrays for compatibility) | |
| SERVICE_NAMES=("LLM-NIM" "VLM-NIM" "FLUX-NIM") | |
| SERVICE_PORTS=("8002" "8001" "8003") | |
| SERVICE_CONTAINERS=("nim-llm" "nim-vlm" "nim-flux") | |
| SERVICE_STATUS=("⏳ Waiting" "⏳ Waiting" "⏳ Waiting") | |
| # Health check endpoints: LLM/VLM use /v1/models, FLUX uses /v1/health/ready | |
| SERVICE_HEALTH_ENDPOINTS=("/v1/models" "/v1/models" "/v1/health/ready") | |
| max_wait_time=1800 # 30 minutes in seconds | |
| start_time=$(date +%s) | |
| check_interval=30 | |
| # Function to print status summary | |
| print_status_summary() { | |
| local elapsed=$1 | |
| local elapsed_min=$((elapsed / 60)) | |
| local elapsed_sec=$((elapsed % 60)) | |
| echo "" | |
| echo "📊 NIM Services Status Summary (${elapsed_min}m ${elapsed_sec}s elapsed):" | |
| echo "─────────────────────────────────────────" | |
| for i in 0 1 2; do | |
| printf " %-10s : %s\n" "${SERVICE_NAMES[$i]}" "${SERVICE_STATUS[$i]}" | |
| done | |
| echo "─────────────────────────────────────────" | |
| } | |
| # Initial status display | |
| print_status_summary 0 | |
| while true; do | |
| current_time=$(date +%s) | |
| elapsed=$((current_time - start_time)) | |
| # Check each service | |
| all_ready=true | |
| for i in 0 1 2; do | |
| # Skip if already ready | |
| if [ "${SERVICE_STATUS[$i]}" = "✅ Ready" ]; then | |
| continue | |
| fi | |
| port="${SERVICE_PORTS[$i]}" | |
| health_endpoint="${SERVICE_HEALTH_ENDPOINTS[$i]}" | |
| # Check if API is responding | |
| if curl -sf http://127.0.0.1:$port$health_endpoint >/dev/null 2>&1; then | |
| response=$(curl -s http://127.0.0.1:$port$health_endpoint 2>/dev/null || echo "") | |
| # Different validation for different endpoints | |
| if [ "$health_endpoint" = "/v1/models" ]; then | |
| # For LLM/VLM: check if models are loaded | |
| if [ -n "$response" ] && echo "$response" | grep -q "object"; then | |
| SERVICE_STATUS[$i]="✅ Ready" | |
| else | |
| SERVICE_STATUS[$i]="⏳ API responding, loading models..." | |
| all_ready=false | |
| fi | |
| elif [ "$health_endpoint" = "/v1/health/ready" ]; then | |
| # For FLUX: check if health endpoint returns success | |
| if [ -n "$response" ]; then | |
| SERVICE_STATUS[$i]="✅ Ready" | |
| else | |
| SERVICE_STATUS[$i]="⏳ Initializing..." | |
| all_ready=false | |
| fi | |
| fi | |
| else | |
| SERVICE_STATUS[$i]="⏳ Starting up..." | |
| all_ready=false | |
| fi | |
| done | |
| # Exit if all ready | |
| if [ "$all_ready" = true ]; then | |
| print_status_summary $elapsed | |
| echo "" | |
| echo "✅ All NIM services are ready!" | |
| break | |
| fi | |
| # Sleep before next check | |
| sleep $check_interval | |
| # Check timeout after sleep | |
| current_time=$(date +%s) | |
| elapsed=$((current_time - start_time)) | |
| if [ $elapsed -ge $max_wait_time ]; then | |
| print_status_summary $elapsed | |
| echo "" | |
| echo "❌ Timeout: Services failed to become ready after 30 minutes" | |
| echo "" | |
| echo "📋 Container status:" | |
| docker ps -a | |
| echo "" | |
| echo "📋 Service logs:" | |
| for i in 0 1 2; do | |
| if [ "${SERVICE_STATUS[$i]}" != "✅ Ready" ]; then | |
| echo "===== ${SERVICE_NAMES[$i]} (${SERVICE_CONTAINERS[$i]}) =====" | |
| docker logs --tail 30 "${SERVICE_CONTAINERS[$i]}" 2>&1 || echo "No logs available" | |
| fi | |
| done | |
| exit 1 | |
| fi | |
| # Print status summary every 30 seconds | |
| print_status_summary $elapsed | |
| done | |
| # Wait for the application to be ready | |
| echo "Waiting for application to be ready..." | |
| max_retries=30 | |
| retry_count=0 | |
| until curl -f http://127.0.0.1:3000 > /dev/null 2>&1; do | |
| retry_count=$((retry_count + 1)) | |
| if [ $retry_count -ge $max_retries ]; then | |
| echo "❌ Application failed to start after $max_retries attempts" | |
| exit 1 | |
| fi | |
| echo "Waiting for application... attempt $retry_count/$max_retries" | |
| sleep 10 | |
| done | |
| echo "✅ Application is ready" | |
| echo "Remaining containers after application is ready:" | |
| docker ps -a | |
| - name: Run Test Code | |
| run: | | |
| # Use --net=host to allow test container to access services on host | |
| docker run --rm \ | |
| --net=host \ | |
| -v "$(pwd):/workspace" \ | |
| nvcr.io/rw983xdqtcdp/auto_test_team/blueprint-github-test-image:latest \ | |
| pytest -m retail_catalog_enrichment \ | |
| --disable-warnings \ | |
| --html=/workspace/retail-catalog-enrichment_test.html \ | |
| --self-contained-html | |
| echo "✅ Test code executed" | |
| - name: Upload notebook and test results as artifacts | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: nim-notebooks-results | |
| path: | | |
| deploy/1_Deploy_Catalog_Enrichment_result.html | |
| retail-catalog-enrichment_test.html | |
| retention-days: 14 | |
| - name: Cleanup Docker resources | |
| if: always() | |
| run: | | |
| echo "🧹 Cleaning up workflow resources..." | |
| # Stop and remove Docker Compose services and their images | |
| echo "Stopping Docker Compose services and removing images..." | |
| # cd Retail-Catalog-Enrichment | |
| docker compose -f docker-compose.yaml down --rmi all 2>/dev/null || true | |
| # cd .. | |
| echo "✅ Docker Compose services and images removed" | |
| sleep 120 # Wait for 2 minutes to ensure all containers are stopped | |
| # Check remaining containers | |
| echo "Remaining containers:" | |
| docker ps -a | |
| # Remove test image | |
| echo "Removing test image..." | |
| docker rmi nvcr.io/rw983xdqtcdp/auto_test_team/blueprint-github-test-image:latest 2>/dev/null || true | |
| # Remove any dangling images that might have been created during notebook execution | |
| echo "Removing dangling images..." | |
| docker image prune -f | |
| # Show remaining images | |
| echo "Remaining images:" | |
| docker images | |
| echo "✅ Workflow cleanup completed" | |
| - name: Set result output | |
| id: set_result | |
| if: always() | |
| run: | | |
| echo "RESULT=$(if [ ${{ job.status }} == 'success' ]; then echo 'PASS'; else echo 'FAIL'; fi)" >> $GITHUB_OUTPUT | |
| - name: Send mail | |
| uses: dawidd6/action-send-mail@6e71c855c9a091d80a519621b9fd3e8d252ca40c | |
| if: always() | |
| with: | |
| server_address: smtp.gmail.com | |
| server_port: 587 | |
| username: ${{ secrets.SMTP_USERNAME }} | |
| password: ${{ secrets.SMTP_PASSWORD }} | |
| # Email details | |
| subject: "QA Test Workflow Result for ${{ github.repository }}" | |
| to: Github-Action-Blueprint-QA@nvidia.com | |
| from: github-workflow-notification@gmail.com | |
| html_body: | | |
| <p>Hello,</p> | |
| <p>The workflow for repository: <strong>${{ github.repository }}</strong> has completed.<br> | |
| <strong>Result:</strong> ${{ steps.set_result.outputs.RESULT }}</p> | |
| <p>You can review the details on GitHub:<br> | |
| <a href="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}">${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}</a></p> | |
| <p>Thanks!</p> |