NVIDIA-AI-Blueprints
diff --git a/‎.env.example‎
Lines changed: 175 additions & 26 deletions b/‎.env.example‎
Lines changed: 175 additions & 26 deletions
diff --git a/‎README.md‎
Lines changed: 51 additions & 5 deletions b/‎README.md‎
Lines changed: 51 additions & 5 deletions
diff --git a/‎docs/SOFTWARE_INVENTORY.md‎
Lines changed: 9 additions & 9 deletions b/‎docs/SOFTWARE_INVENTORY.md‎
Lines changed: 9 additions & 9 deletions
@@ -1,41 +1,190 @@
+# =============================================================================
+# Warehouse Operational Assistant - Environment Configuration
+# =============================================================================
+# 
+# Copy this file to .env and update with your actual values:
+#   cp .env.example .env
+#   nano .env  # or your preferred editor
+#
+# For Docker Compose deployments, place .env in deploy/compose/ directory
+# =============================================================================
+
+# =============================================================================
+# ENVIRONMENT
+# =============================================================================
+# Set to 'production' for production deployments, 'development' for local dev
+ENVIRONMENT=development
+
+# =============================================================================
+# DATABASE CONFIGURATION (PostgreSQL/TimescaleDB)
+# =============================================================================
+# Database connection settings
 POSTGRES_USER=warehouse
-POSTGRES_PASSWORD=warehousepw
+POSTGRES_PASSWORD=changeme  # ⚠️ CHANGE IN PRODUCTION!
 POSTGRES_DB=warehouse
+DB_HOST=localhost
+DB_PORT=5435
 
-# Database Configuration
-PGHOST=127.0.0.1
-PGPORT=5435
-
-# Redis Configuration
-REDIS_HOST=127.0.0.1
-REDIS_PORT=6379
+# Alternative database URL format (overrides individual settings above)
+# DATABASE_URL=postgresql://warehouse:changeme@localhost:5435/warehouse
 
-# Kafka Configuration
-KAFKA_BROKER=kafka:9092
+# =============================================================================
+# SECURITY
+# =============================================================================
+# JWT Secret Key - REQUIRED for production, optional for development
+# Generate a strong random key: openssl rand -hex 32
+# Minimum 32 characters recommended
+JWT_SECRET_KEY=your-strong-random-secret-minimum-32-characters-change-this-in-production
 
-# Milvus Configuration
-MILVUS_HOST=127.0.0.1
-MILVUS_PORT=19530
+# Admin user default password (change in production!)
+DEFAULT_ADMIN_PASSWORD=changeme
 
-# NVIDIA NIM Configuration
-NVIDIA_API_KEY=your_nvidia_ngc_api_key_here
-LLM_NIM_URL=https://integrate.api.nvidia.com/v1
-EMBEDDING_NIM_URL=https://integrate.api.nvidia.com/v1
+# =============================================================================
+# REDIS CONFIGURATION
+# =============================================================================
+REDIS_HOST=localhost
+REDIS_PORT=6379
+REDIS_PASSWORD=  # Leave empty for development
+REDIS_DB=0
 
-# Optional: NeMo Guardrails Configuration
-RAIL_API_KEY=your_nvidia_ngc_api_key_here
-DATABASE_URL=postgresql://warehouse:warehousepw@localhost:5435/warehouse
+# =============================================================================
+# VECTOR DATABASE (Milvus)
+# =============================================================================
+MILVUS_HOST=localhost
+MILVUS_PORT=19530
+MILVUS_USER=root
+MILVUS_PASSWORD=Milvus
 
-# GPU Acceleration Configuration
+# GPU Acceleration for Milvus
 MILVUS_USE_GPU=true
 MILVUS_GPU_DEVICE_ID=0
 CUDA_VISIBLE_DEVICES=0
 MILVUS_INDEX_TYPE=GPU_CAGRA
 MILVUS_COLLECTION_NAME=warehouse_docs_gpu
 
+# =============================================================================
+# MESSAGE QUEUE (Kafka)
+# =============================================================================
+KAFKA_BOOTSTRAP_SERVERS=localhost:9092
+# Alternative: KAFKA_BROKER=kafka:9092
+
+# =============================================================================
+# NVIDIA NIM LLM CONFIGURATION
+# =============================================================================
+# 
+# IMPORTANT: Different models use different endpoints!
+# 
+# For the 49B model (llama-3.3-nemotron-super-49b-v1):
+#   - Use: https://api.brev.dev/v1
+#   - This is the correct endpoint for the 49B model
+#
+# For other NVIDIA NIM models:
+#   - Use: https://integrate.api.nvidia.com/v1
+#   - This is the standard NVIDIA NIM endpoint
+#
+# For self-hosted NIM instances:
+#   - Use your own endpoint URL (e.g., http://localhost:8000/v1 or https://your-nim-instance.com/v1)
+#   - Ensure your NIM instance is accessible and properly configured
+#
+# Your NVIDIA API key (same key works for both endpoints)
+NVIDIA_API_KEY=your-nvidia-api-key-here
+
+# LLM Service Endpoint
+# For 49B model: https://api.brev.dev/v1
+# For other NIMs: https://integrate.api.nvidia.com/v1
+# For self-hosted: http://your-nim-host:port/v1
+LLM_NIM_URL=https://api.brev.dev/v1
+
+# LLM Model Identifier
+# Example for 49B model:
+LLM_MODEL=nvcf:nvidia/llama-3.3-nemotron-super-49b-v1:dep-36ZiLbQIG2ZzK7gIIC5yh1E6lGk
+
+# LLM Generation Parameters
+LLM_TEMPERATURE=0.1
+LLM_MAX_TOKENS=2000
+LLM_TOP_P=1.0
+LLM_FREQUENCY_PENALTY=0.0
+LLM_PRESENCE_PENALTY=0.0
+LLM_CLIENT_TIMEOUT=120  # Timeout in seconds
+
+# LLM Caching
+LLM_CACHE_ENABLED=true
+LLM_CACHE_TTL_SECONDS=300  # Cache TTL in seconds (5 minutes)
+
+# =============================================================================
+# EMBEDDING SERVICE CONFIGURATION
+# =============================================================================
+# Embedding service endpoint (typically uses NVIDIA endpoint)
+EMBEDDING_NIM_URL=https://integrate.api.nvidia.com/v1
+# Embedding API key (usually same as NVIDIA_API_KEY)
+# EMBEDDING_API_KEY=your-embedding-api-key  # Defaults to NVIDIA_API_KEY if not set
+
+# =============================================================================
+# CORS CONFIGURATION
+# =============================================================================
+# Allowed origins for CORS (comma-separated)
+# Add your frontend URLs here
+CORS_ORIGINS=http://localhost:3001,http://localhost:3000,http://127.0.0.1:3001,http://127.0.0.1:3000
+
+# =============================================================================
+# UPLOAD & REQUEST LIMITS
+# =============================================================================
+# Maximum request size in bytes (default: 10MB)
+MAX_REQUEST_SIZE=10485760
+
+# Maximum upload size in bytes (default: 50MB)
+MAX_UPLOAD_SIZE=52428800
+
+# =============================================================================
+# NeMo Guardrails Configuration
+# =============================================================================
+# RAIL_API_KEY=your_nvidia_ngc_api_key_here
+
+# =============================================================================
 # Document Extraction Agent - NVIDIA NeMo API Keys
-NEMO_RETRIEVER_API_KEY=your_nvidia_ngc_api_key_here
-NEMO_OCR_API_KEY=your_nvidia_ngc_api_key_here
-NEMO_PARSE_API_KEY=your_nvidia_ngc_api_key_here
-LLAMA_NANO_VL_API_KEY=your_nvidia_ngc_api_key_here
-LLAMA_70B_API_KEY=your_nvidia_ngc_api_key_here
+# =============================================================================
+# NEMO_RETRIEVER_API_KEY=your_nvidia_ngc_api_key_here
+# NEMO_OCR_API_KEY=your_nvidia_ngc_api_key_here
+# NEMO_PARSE_API_KEY=your_nvidia_ngc_api_key_here
+# LLAMA_NANO_VL_API_KEY=your_nvidia_ngc_api_key_here
+# LLAMA_70B_API_KEY=your_nvidia_ngc_api_key_here
+
+# =============================================================================
+# EXTERNAL SERVICE INTEGRATIONS
+# =============================================================================
+# WMS_API_KEY=your-wms-api-key
+# ERP_API_KEY=your-erp-api-key
+
+# =============================================================================
+# NOTES FOR DEVELOPERS
+# =============================================================================
+#
+# 1. LLM Endpoint Configuration:
+#    - The 49B model REQUIRES https://api.brev.dev/v1
+#    - Other NIM models use https://integrate.api.nvidia.com/v1
+#    - Both endpoints use the same NVIDIA_API_KEY
+#    - You can deploy NIMs on your own instances and consume them via endpoint
+#      (e.g., http://localhost:8000/v1 or https://your-nim-instance.com/v1)
+#    - For self-hosted NIMs, ensure the endpoint is accessible and properly configured
+#
+# 2. Security:
+#    - NEVER commit .env files to version control
+#    - Change all default passwords in production
+#    - Use strong, unique JWT_SECRET_KEY in production
+#    - JWT_SECRET_KEY is REQUIRED in production (app will fail to start without it)
+#
+# 3. Database:
+#    - Default port 5435 is used to avoid conflicts with standard PostgreSQL (5432)
+#    - Ensure Docker containers are running before starting the backend
+#
+# 4. Testing:
+#    - View logs in real-time: ./scripts/view_logs.sh
+#    - Restart backend: ./restart_backend.sh
+#    - Check health: curl http://localhost:8001/api/v1/health
+#
+# 5. Getting NVIDIA API Keys:
+#    - Sign up at: https://build.nvidia.com/
+#    - Get your API key from the NVIDIA dashboard
+#    - The same key works for both brev.dev and integrate.api.nvidia.com endpoints
+#
+# =============================================================================
@@ -498,9 +498,10 @@ The system implements **NVIDIA NeMo Guardrails** for content safety, security, a
 
 NeMo Guardrails provides multi-layer protection for the warehouse operational assistant:
 
+- **API Integration** - Uses NVIDIA NeMo Guardrails API for intelligent safety validation
 - **Input Safety Validation** - Checks user queries before processing
 - **Output Safety Validation** - Validates AI responses before returning to users
-- **Pattern-Based Detection** - Identifies violations using keyword and phrase matching
+- **Pattern-Based Fallback** - Falls back to keyword/phrase matching if API is unavailable
 - **Timeout Protection** - Prevents hanging requests with configurable timeouts
 - **Graceful Degradation** - Continues operation even if guardrails fail
 
@@ -546,7 +547,31 @@ Redirects non-warehouse related queries:
 
 ### Configuration
 
-Guardrails configuration is defined in `data/config/guardrails/rails.yaml`:
+#### Environment Variables
+
+The guardrails service can be configured via environment variables:
+
+```bash
+# NeMo Guardrails API Configuration
+# Use RAIL_API_KEY for guardrails-specific key, or it will fall back to NVIDIA_API_KEY
+RAIL_API_KEY=your-nvidia-api-key-here
+
+# Guardrails API endpoint (defaults to NVIDIA's cloud endpoint)
+RAIL_API_URL=https://integrate.api.nvidia.com/v1
+
+# Timeout for guardrails API calls in seconds (default: 10)
+GUARDRAILS_TIMEOUT=10
+
+# Enable/disable API usage (default: true)
+# If false, will only use pattern-based matching
+GUARDRAILS_USE_API=true
+```
+
+**Note:** If `RAIL_API_KEY` is not set, the service will use `NVIDIA_API_KEY` as a fallback. If neither is set, the service will use pattern-based matching only.
+
+#### YAML Configuration
+
+Guardrails configuration is also defined in `data/config/guardrails/rails.yaml`:
 
 ```yaml
 # Safety and compliance rules
@@ -623,10 +648,12 @@ python tests/unit/test_guardrails.py
 The guardrails service (`src/api/services/guardrails/guardrails_service.py`) provides:
 
 - **GuardrailsService** class with async methods
-- **Pattern matching** for violation detection
+- **API Integration** - Calls NVIDIA NeMo Guardrails API for intelligent validation
+- **Pattern-based Fallback** - Falls back to keyword/phrase matching if API unavailable
 - **Safety response generation** based on violation types
 - **Configuration loading** from YAML files
 - **Error handling** with graceful degradation
+- **Automatic fallback** - Seamlessly switches to pattern matching on API failures
 
 ### Response Format
 
@@ -666,13 +693,32 @@ Guardrails activity is logged and monitored:
 4. **Customization**: Adjust timeout values based on your infrastructure
 5. **Response Messages**: Keep safety responses professional and helpful
 
+### API Integration Details
+
+The guardrails service now integrates with the NVIDIA NeMo Guardrails API:
+
+1. **Primary Method**: API-based validation using NVIDIA's guardrails endpoint
+   - Uses `/chat/completions` endpoint with safety-focused prompts
+   - Leverages LLM-based violation detection for more intelligent analysis
+   - Returns structured JSON with violation details and confidence scores
+
+2. **Fallback Method**: Pattern-based matching
+   - Automatically used if API is unavailable or times out
+   - Uses keyword/phrase matching for common violation patterns
+   - Ensures system continues to function even without API access
+
+3. **Hybrid Approach**: Best of both worlds
+   - API provides intelligent, context-aware validation
+   - Pattern matching ensures reliability and low latency fallback
+   - Seamless switching between methods based on availability
+
 ### Future Enhancements
 
 Planned improvements:
-- Integration with full NeMo Guardrails SDK
-- LLM-based violation detection (beyond pattern matching)
+- Enhanced API integration with dedicated guardrails endpoints
 - Machine learning for adaptive threat detection
 - Enhanced monitoring dashboards
+- Custom guardrails rules via API configuration
 
 **Related Documentation:**
 - Configuration file: `data/config/guardrails/rails.yaml`
 
@@ -2,8 +2,8 @@
 
 This document lists all third-party software packages used in this project, including their versions, licenses, authors, and sources.
 
-**Generated:** Automatically from dependency files  
-**Last Updated:** 2025-01-XX  
+**Generated:** Automatically from dependency files
+**Last Updated:** 2025-12-08
 **Generation Script:** `scripts/tools/generate_software_inventory.py`
 
 ## How to Regenerate
@@ -37,15 +37,16 @@ The script automatically:
 | Faker | 19.0.0 | MIT License | https://github.com/joke2k/faker | joke2k <joke2k@gmail.com> | PyPI | pip |
 | fastapi | 0.119.0 | MIT License | https://pypi.org/project/fastapi/ | Sebastián Ramírez <tiangolo@gmail.com> | PyPI | pip |
 | httpx | 0.27.0 | BSD License | https://pypi.org/project/httpx/ | Tom Christie <tom@tomchristie.com> | PyPI | pip |
-| langchain-core | 0.1.0 | MIT | https://github.com/langchain-ai/langchain | N/A | PyPI | pip |
+| langchain-core | 0.3.80 | MIT | https://pypi.org/project/langchain-core/ | N/A | PyPI | pip |
 | langgraph | 0.2.30 | MIT | https://www.github.com/langchain-ai/langgraph | N/A | PyPI | pip |
 | loguru | 0.7.0 | MIT license | https://github.com/Delgan/loguru | Delgan <delgan.py@gmail.com> | PyPI | pip |
 | numpy | 1.24.0 | BSD-3-Clause | https://www.numpy.org | Travis E. Oliphant et al. | PyPI | pip |
 | paho-mqtt | 1.6.0 | Eclipse Public License v2.0 / Eclipse Distribution License v1.0 | http://eclipse.org/paho | Roger Light <roger@atchoo.org> | PyPI | pip |
 | pandas | 1.2.4 | BSD | https://pandas.pydata.org | N/A | PyPI | pip |
 | passlib | 1.7.4 | BSD | https://passlib.readthedocs.io | Eli Collins <elic@assurancetechnologies.com> | PyPI | pip |
-| pillow | 10.0.0 | HPND | https://python-pillow.org | Jeffrey A. Clark (Alex) <aclark@aclark.net> | PyPI | pip |
+| pillow | 10.3.0 | HPND | https://pypi.org/project/Pillow/ | "Jeffrey A. Clark" <aclark@aclark.net> | PyPI | pip |
 | prometheus-client | 0.19.0 | Apache Software License 2.0 | https://github.com/prometheus/client_python | Brian Brazil <brian.brazil@robustperception.io> | PyPI | pip |
+| psutil | 5.9.0 | BSD | https://github.com/giampaolo/psutil | Giampaolo Rodola <g.rodola@gmail.com> | PyPI | pip |
 | psycopg | 3.0 | GNU Lesser General Public License v3 (LGPLv3) | https://psycopg.org/psycopg3/ | Daniele Varrazzo <daniele.varrazzo@gmail.com> | PyPI | pip |
 | pydantic | 2.7.0 | MIT License | https://pypi.org/project/pydantic/ | Samuel Colvin <s@muelcolvin.com>, Eric Jolibois <em.jolibois@gmail.com>, Hasan Ramezani <hasan.r67@gmail.com>, Adrian Garcia Badaracco <1755071+adr... | PyPI | pip |
 | PyJWT | 2.8.0 | MIT | https://github.com/jpadilla/pyjwt | Jose Padilla <hello@jpadilla.com> | PyPI | pip |
@@ -57,8 +58,8 @@ The script automatically:
 | python-multipart | 0.0.20 | Apache Software License | https://pypi.org/project/python-multipart/ | Andrew Dunham <andrew@du.nham.ca>, Marcelo Trylesinski <marcelotryle@gmail.com> | PyPI | pip |
 | PyYAML | 6.0 | MIT | https://pyyaml.org/ | Kirill Simonov <xi@resolvent.net> | PyPI | pip |
 | redis | 5.0.0 | MIT | https://github.com/redis/redis-py | Redis Inc. <oss@redis.com> | PyPI | pip |
-| requests | 2.31.0 | Apache 2.0 | https://requests.readthedocs.io | Kenneth Reitz <me@kennethreitz.org> | PyPI | pip |
-| scikit-learn | 1.0 | new BSD | http://scikit-learn.org | N/A | PyPI | pip |
+| requests | 2.32.4 | Apache-2.0 | https://requests.readthedocs.io | Kenneth Reitz <me@kennethreitz.org> | PyPI | pip |
+| scikit-learn | 1.5.0 | new BSD | https://scikit-learn.org | N/A | PyPI | pip |
 | tiktoken | 0.12.0 | MIT License | https://pypi.org/project/tiktoken/ | Shantanu Jain <shantanu@openai.com> | PyPI | pip |
 | uvicorn | 0.30.1 | BSD License | https://pypi.org/project/uvicorn/ | Tom Christie <tom@tomchristie.com> | PyPI | pip |
 | websockets | 11.0 | BSD-3-Clause | https://pypi.org/project/websockets/ | Aymeric Augustin <aymeric.augustin@m4x.org> | PyPI | pip |
@@ -93,20 +94,19 @@ The script automatically:
 | MIT | 14 |
 | BSD-3-Clause | 5 |
 | MIT License | 4 |
-| BSD | 3 |
+| BSD | 4 |
 | BSD License | 2 |
 | Apache License, Version 2.0 | 2 |
 | Apache Software License | 2 |
+| Apache-2.0 | 2 |
 | MIT license | 1 |
 | Apache 2 | 1 |
 | CC0 (copyright waived) | 1 |
 | Apache Software License 2.0 | 1 |
 | GNU Lesser General Public License v3 (LGPLv3) | 1 |
 | Eclipse Public License v2.0 / Eclipse Distribution License v1.0 | 1 |
 | N/A | 1 |
-| Apache 2.0 | 1 |
 | new BSD | 1 |
-| Apache-2.0 | 1 |
 | HPND | 1 |
 | GNU AFFERO GPL 3.0 | 1 |
 | ISC | 1 |