awslabs
diff --git a/‎.github/workflows/code.e2e-full-test.weekly.yml‎
Lines changed: 9 additions & 3 deletions b/‎.github/workflows/code.e2e-full-test.weekly.yml‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎.github/workflows/code.end-to-end-test.nightly.yml‎
Lines changed: 16 additions & 4 deletions b/‎.github/workflows/code.end-to-end-test.nightly.yml‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎.github/workflows/code.publish.yml‎
Lines changed: 9 additions & 1 deletion b/‎.github/workflows/code.publish.yml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎.github/workflows/code.smoke-test.yml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/code.smoke-test.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 12 additions & 12 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 46 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 27 additions & 7 deletions b/‎README.md‎
Lines changed: 27 additions & 7 deletions
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bin/build-images‎
Lines changed: 22 additions & 17 deletions b/‎bin/build-images‎
Lines changed: 22 additions & 17 deletions
@@ -34,13 +34,18 @@ jobs:
         env:
           SLACK_TITLE: 'Full E2E Tests Starting'
           MSG_MINIMAL: true
-          SLACK_MESSAGE: 'Full E2E test suite has started on ref `${{ inputs.ref || github.ref_name }}`...'
+          SLACK_MESSAGE: "Full E2E test suite has started on ref `${{ inputs.ref || 'develop' }}`..."
 
   full-e2e:
     name: Run Full E2E Tests
     runs-on: ubuntu-latest
     timeout-minutes: 60
+    outputs:
+      tested_ref: ${{ steps.set-ref.outputs.ref }}
     steps:
+      - name: Set tested ref
+        id: set-ref
+        run: echo "ref=${{ inputs.ref || 'develop' }}" >> $GITHUB_OUTPUT
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4
         with:
           ref: ${{ inputs.ref || 'develop' }}
@@ -52,12 +57,13 @@ jobs:
       - name: Install base dependencies
         run: npm ci
       - name: Run Cypress Full E2E Suite
+        working-directory: cypress
         env:
           ADMIN_USER_NAME: ${{ secrets.ADMIN_USER_NAME }}
           ADMIN_PASSWORD: ${{ secrets.ADMIN_PASSWORD }}
           USER_NAME: ${{ secrets.USER_NAME }}
           USER_PASSWORD: ${{ secrets.USER_PASSWORD }}
-        run: npx cypress run --config-file cypress/cypress.e2e.config.ts
+        run: npx cypress run --config-file cypress.e2e.config.ts
       - name: Archive Cypress videos & screenshots
         if: always()
         uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v4
@@ -79,4 +85,4 @@ jobs:
           SLACK_COLOR: ${{ needs.full-e2e.result == 'success' && 'good' || 'danger' }}
           SLACK_TITLE: 'Full E2E Tests Finished'
           MSG_MINIMAL: false
-          SLACK_MESSAGE: ${{ needs.full-e2e.result == 'success' && format('Full E2E test suite passed on ref `{0}`.', inputs.ref || github.ref_name) || format('<!here> Full E2E test suite {0} on ref `{1}`.', needs.full-e2e.result, inputs.ref || github.ref_name) }}
+          SLACK_MESSAGE: ${{ needs.full-e2e.result == 'success' && format('Full E2E test suite passed on ref `{0}`.', needs.full-e2e.outputs.tested_ref) || format('<!here> Full E2E test suite {0} on ref `{1}`.', needs.full-e2e.result, needs.full-e2e.outputs.tested_ref) }}
@@ -4,6 +4,12 @@ on:
   schedule:
     - cron: '0 0 * * *'
   workflow_dispatch:
+    inputs:
+      ref:
+        description: 'Branch or tag to test against'
+        required: false
+        default: 'develop'
+        type: string
 
 permissions:
   contents: read
@@ -27,10 +33,15 @@ jobs:
     name: 🏃‍♀️ Run E2E Tests
     runs-on: ubuntu-latest
     timeout-minutes: 15
+    outputs:
+      tested_ref: ${{ steps.set-ref.outputs.ref }}
     steps:
+      - name: Set tested ref
+        id: set-ref
+        run: echo "ref=${{ inputs.ref || 'develop' }}" >> $GITHUB_OUTPUT
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4
         with:
-          ref: develop
+          ref: ${{ inputs.ref || 'develop' }}
       - name: Setup Node.js
         uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v4
         with:
@@ -39,6 +50,7 @@ jobs:
       - name: Install base dependencies
         run: npm ci
       - name: Run Cypress E2E Suite
+        working-directory: cypress
         env:
           ADMIN_USER_NAME: ${{ secrets.ADMIN_USER_NAME }}
           ADMIN_PASSWORD: ${{ secrets.ADMIN_PASSWORD }}
@@ -48,8 +60,8 @@ jobs:
         # Update this list when adding new quick E2E specs.
         run: >-
           npx cypress run
-          --config-file cypress/cypress.e2e.config.ts
-          --spec "cypress/src/e2e/specs/admin.e2e.spec.ts,cypress/src/e2e/specs/user.e2e.spec.ts,cypress/src/e2e/specs/chat.e2e.spec.ts,cypress/src/e2e/specs/bedrock-quick.e2e.spec.ts"
+          --config-file cypress.e2e.config.ts
+          --spec "src/e2e/specs/admin.e2e.spec.ts,src/e2e/specs/user.e2e.spec.ts,src/e2e/specs/chat.e2e.spec.ts,src/e2e/specs/bedrock-quick.e2e.spec.ts"
       - name: Archive Cypress videos & screenshots
         if: always()
         uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v4
@@ -71,4 +83,4 @@ jobs:
           SLACK_COLOR: ${{ needs.e2e.result == 'success' && 'good' || 'danger' }}
           SLACK_TITLE: 'Nightly E2E Health Check Finished'
           MSG_MINIMAL: false
-          SLACK_MESSAGE: ${{ needs.e2e.result == 'success' && format('Nightly E2E health check passed on branch `{0}`.', github.ref_name) || format('<!here> Nightly E2E health check {0} on branch `{1}`.', needs.e2e.result, github.ref_name) }}
+          SLACK_MESSAGE: ${{ needs.e2e.result == 'success' && format('Nightly E2E health check passed on branch `{0}`.', needs.e2e.outputs.tested_ref) || format('<!here> Nightly E2E health check {0} on branch `{1}`.', needs.e2e.result, needs.e2e.outputs.tested_ref) }}
@@ -63,7 +63,15 @@ jobs:
         run: npm version "${{ inputs.version }}" --no-git-tag-version --allow-same-version
       - name: Publish NPM Package
         if: "!(github.event_name == 'workflow_dispatch' && inputs.test_mode == true)"
-        run: npm publish
+        run: |
+          npm publish 2>&1 | tee /tmp/npm-publish.log || {
+            if grep -q "Cannot publish over previously published version" /tmp/npm-publish.log; then
+              echo "::warning::NPM package version already exists, skipping publish"
+            else
+              echo "::error::NPM publish failed"
+              exit 1
+            fi
+          }
       - name: Publish NPM Package (Dry Run)
         if: github.event_name == 'workflow_dispatch' && inputs.test_mode == true
         run: npm publish --dry-run
 
@@ -38,7 +38,8 @@ jobs:
       # Execute Cypress tests
       # ────────────────────────────────────────────────────
       - name: Run Cypress Smoke Suite
-        run: npx cypress run --config-file cypress/cypress.smoke.config.ts
+        working-directory: cypress
+        run: npx cypress run --config-file cypress.smoke.config.ts
 
       - name: Archive Cypress videos & screenshots
         if: failure() || always()
 
@@ -42,6 +42,7 @@ lib/rag/ingestion/ingestion-image/build
 .kiro/
 .amazonq/
 memory-bank/
+tracking/
 
 # Coverage Statistic Folders
 coverage
@@ -62,3 +63,7 @@ config-generated.yaml
 # Cypress local environment
 /cypress/.env.local
 .npmrc
+
+# RAG Evaluation configs
+test/integration/rag/eval_datasets/eval_config.yaml
+test/integration/rag/eval_datasets/golden-dataset.jsonl
@@ -13,7 +13,7 @@ repos:
       files: config-base.yaml
 
 - repo: https://github.com/PyCQA/bandit
-  rev: '1.9.2'
+  rev: '1.9.4'
   hooks:
     - id: bandit
       args: [--recursive, -c=pyproject.toml]
@@ -48,28 +48,28 @@ repos:
       exclude: ^test/cdk/stacks/__baselines__/
 
 - repo: https://github.com/codespell-project/codespell
-  rev: v2.4.1
+  rev: v2.4.2
   hooks:
     - id: codespell
       entry: codespell
-      args: ['--skip=*.git*,*cdk.out*,*venv*,*mypy_cache*,*package-lock*,*node_modules*,*dist/*,*/public/*,*poetry.lock*,*coverage*,*models/*,*htmlcov*,*TIKTOKEN_CACHE/*,*test/cdk/stacks/__baselines__/*', "-L=xdescribe,assertIn,afterAll"]
+      args: ['--skip=*.git*,*cdk.out*,*venv*,*mypy_cache*,*package-lock*,*node_modules*,*dist/*,*/public/*,*poetry.lock*,*coverage*,*models/*,*htmlcov*,*TIKTOKEN_CACHE/*,*test/cdk/stacks/__baselines__/*,*.jsonl', "-L=xdescribe,assertIn,afterAll"]
       pass_filenames: false
 
 - repo: https://github.com/pycqa/isort
-  rev: 7.0.0
+  rev: 8.0.1
   hooks:
     - id: isort
       name: isort (python)
       exclude: ^test/cdk/stacks/__baselines__/
 
 - repo: https://github.com/psf/black
-  rev: '25.12.0'
+  rev: '26.3.1'
   hooks:
     - id: black
       exclude: ^test/cdk/stacks/__baselines__/
 
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: 'v0.14.9'
+  rev: 'v0.15.11'
   hooks:
     - id: ruff-check
       args:
@@ -95,7 +95,7 @@ repos:
 
 
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: 'v1.19.0'
+  rev: 'v1.20.1'
   hooks:
     - id: mypy
       verbose: true
@@ -110,13 +110,13 @@ repos:
       exclude: ^test/
 
 - repo: https://github.com/pre-commit/mirrors-eslint
-  rev: 'v9.39.1'
+  rev: 'v10.2.1'
   hooks:
     - id: eslint
       files: \.[jt]sx?$
       types: [file]
       args:
-        - --max-warnings=20
+        - --max-warnings=40
         - --no-warn-ignored
         - --fix
 
@@ -126,7 +126,7 @@ repos:
 #     - id: python-safety-dependencies-check
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.19.0
+  rev: v3.21.2
   hooks:
     - id: pyupgrade
       args: [--py313-plus]
@@ -139,7 +139,7 @@ repos:
 #       files: \.(json|yaml|yml|template)$
 
 - repo: https://github.com/Lucas-C/pre-commit-hooks
-  rev: v1.5.5
+  rev: v1.5.6
   hooks:
     - id: insert-license
       files: \.[jt]sx?$  # *.js, *.jsx, *.ts and *.tsx
@@ -150,7 +150,7 @@ repos:
         - /**| |*/        # defaults to:  #
 
 - repo: https://github.com/Lucas-C/pre-commit-hooks
-  rev: v1.5.5
+  rev: v1.5.6
   hooks:
     - id: insert-license
       files: \.py$
 
@@ -1,3 +1,49 @@
+# v6.6.0
+
+## Key Features
+
+### Token Usage and Context Window Visibility
+
+LISA now provides improved observability for model usage and configuration:
+
+- View cumulative token usage for each user session.
+- Display a context window field in model cards across Model Management and Model Library.
+- Support overriding inferred context windows for LISA-hosted models through environment configuration.
+
+### Bedrock Agent Integration
+
+LISA now includes native Bedrock Agent integration, giving administrators a streamlined way to publish Bedrock Agents in the platform catalog and make them available to end users.
+
+Users can opt in to these agents directly from the Agent Management UI, which makes it easier to adopt Bedrock-powered workflows without separate integration steps.
+
+### LISA Serve Throttling
+
+LISA Serve now includes throttling controls to better protect service stability under bursty or high-volume traffic patterns.
+
+These controls help prevent noisy-neighbor behavior, improve predictability during traffic spikes, and provide a stronger baseline for multi-tenant reliability.
+
+### Security Hardening
+
+CORS origins are now configurable via a new `corsAllowedOrigins` allowlist that is threaded through all API Gateways, Lambdas, FastAPI services, and MCP server components via a new CDK aspect, replacing permissive defaults. Additionally, client-side OAuth callback validation, safe error rendering in the UI, and stricter Pydantic request parsing for MCP Server and Workbench Lambdas reduce injection and untrusted-input risks.
+
+## Other Key Changes
+
+- Dependency and security maintenance updates across Python and npm packages.
+- Minor reliability fixes discovered during routine update work.
+- Small MCP Workbench lifecycle improvements for tool synchronization and routing.
+- Cypress CI workflow fixes for branch reporting and manual nightly test support.
+- Incremental SDK improvements, including RAG evaluation support.
+
+
+## Acknowledgements
+* @bedanley
+* @drduhe
+* @estohlmann
+* @gingerknight
+* @jmharold
+
+**Full Changelog**: https://github.com/awslabs/LISA/compare/v6.5.0..v6.6.0
+
 # v6.5.0
 
 ## Key Features
 
@@ -5,16 +5,16 @@
 
 ## What is LISA?
 
-Our large language model (LLM) inference solution for the Amazon Dedicated Cloud (ADC), LISA, is open source infrastructure-as-code. Customers deploy it directly into an Amazon Web Services (AWS) account in any region. LISA is scalable and ready to support production use cases.
+Our large language model (LLM) inference solution for the Amazon Dedicated Cloud (ADC), LISA, is open source infrastructure-as-code. Customers deploy it directly into an Amazon Web Services (AWS) account in any region. LISA is modular, scalable, and ready to support production use cases.
 
-LISA accelerates GenAI adoption by offering built-in configurability with Amazon Bedrock models, Knowledge Bases, and Guardrails. Also by offering advanced capabilities like an optional enterprise-ready chat user interface (UI) with configurable features, authentication, resource access control, centralized model orchestration via LiteLLM, model self-hosting via Amazon ECS, retrieval augmented generation (RAG), APIs, and broad model context protocol (MCP) support and features. LISA is also compatible with OpenAI’s API specification making it easily configurable with supporting solutions. For example, the Continue plugin for VSCode and JetBrains integrated development environments (IDE).
+LISA accelerates GenAI adoption by offering built-in configurability with Amazon Bedrock models, Knowledge Bases, and Guardrails. LISA also offers advanced capabilities like an optional enterprise-ready chat user interface (UI) with configurable features, authentication, resource access control, centralized model orchestration via LiteLLM, model self-hosting via Amazon ECS, retrieval augmented generation (RAG), APIs, and broad model context protocol (MCP) support. LISA is also compatible with OpenAI’s API specification, making it easily configurable with supporting solutions. For example, use LISA as the model provider via the Continue plugin for VSCode and JetBrains integrated development environments (IDEs).
 
-LISA's roadmap is customer-driven, with new capabilities launching monthly. Reach out to the product team to ask questions, provide feedback, and send feature requests via the "Contact Us" button above.
+Reach out to the team to ask questions and provide feedback via the "Contact Us" button above.
 
 ## Key Features
 
 * **Open Source**: No subscription or licensing fees. LISA costs are based on service usage.
-* **Ongoing Releases**: The product roadmap is customer-driven with releases typically every 2-4 weeks. LISA is backed by a software development team that builds production grade solutions to accelerate customers' GenAI adoption.
+* **Support**: LISA is backed by a software development team that builds and maintains production-ready solutions to accelerate customers' GenAI adoption.
 * **Model Flexibility**: Bring your own models for self-hosting, or quickly configure LISA with 100+ models supported by third-party model providers, including Amazon Bedrock and Jumpstart.
 * **Model Orchestration**: Centralize and standardize unique API calls to third-party model providers automatically with LISA via LiteLLM. LISA standardizes the unique API calls into the OpenAI format automatically. All that is required is an API key, model name, and API endpoint.
 * **Modular Components**: Accelerate GenAI adoption with secure, scalable software. LISA supports various use cases through configurable components: model serving and orchestration, chat user interface with advanced capabilities, authentication, retrieval augmented generation (RAG), Anthropic’s Model Context Protocol (MCP), and APIs.
@@ -23,7 +23,28 @@ LISA's roadmap is customer-driven, with new capabilities launching monthly. Reac
 
 ## Major Components
 
-LISA’s four major components include Serve, a Chat UI, RAG, and MCP. LISA Serve and LISA MCP are standalone, foundational core solutions with APIs for customers not leveraging LISA’s Chat UI. Both LISA’s Chat UI and RAG are optional components, but must be used with Serve.
+LISA’s four major components include Serve, a Chat UI, RAG, and MCP. LISA Serve and LISA MCP are standalone, foundational core solutions with APIs for customers not leveraging LISA’s Chat UI. Both LISA’s Chat UI and RAG are optional components, but both must be used with Serve.
+
+### Chat UI Screenshots
+
+<table>
+  <tr>
+    <td align="center"><strong>Chat UI</strong></td>
+    <td align="center"><strong>Feature Configuration</strong></td>
+  </tr>
+  <tr>
+    <td><img src="lib/docs/assets/UI_Screenshots/LISA_Chat.png" alt="LISA Chat UI screenshot" width="100%"></td>
+    <td><img src="lib/docs/assets/UI_Screenshots/LISA_Feature_Config_Page.png" alt="LISA Feature Configuration page screenshot" width="100%"></td>
+  </tr>
+  <tr>
+    <td align="center"><strong>Model Management Wizard</strong></td>
+    <td align="center"><strong>RAG Management Wizard</strong></td>
+  </tr>
+  <tr>
+    <td><img src="lib/docs/assets/UI_Screenshots/LISA_Model_Mgmt_Wizard.png" alt="LISA Model Management wizard screenshot" width="100%"></td>
+    <td><img src="lib/docs/assets/UI_Screenshots/LISA_RAG_Mgmt_Wizard.png" alt="LISA RAG Management wizard screenshot" width="100%"></td>
+  </tr>
+</table>
 
 Read more in the Architecture Overview section of LISA's documentation site linked above.
 
@@ -57,6 +78,5 @@ installation and usage.
 ## License
 
 Although this repository is released under the Apache 2.0 license, when configured to use PGVector as a RAG store it
-uses
-the third party `psycopg2-binary` library. The `psycopg2-binary` project's licensing includes
+uses the third-party `psycopg2-binary` library. The `psycopg2-binary` project's licensing includes
 the [LGPL with exceptions](https://github.com/psycopg/psycopg2/blob/master/LICENSE) license.
@@ -1 +1 @@
-6.5.0
+6.6.0
@@ -141,23 +141,28 @@ build_all_images() {
         echo ""
     fi
 
-    # lisa-tei
-    build_image "Dockerfile" "lisa-tei" "latest" "./lib/serve/ecs-model/embedding/tei" \
-        "NODE_ENV=production" \
-        "BASE_IMAGE=ghcr.io/huggingface/text-embeddings-inference:latest" \
-        "MOUNTS3_DEB_URL=https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb"
-
-    # lisa-tgi
-    build_image "Dockerfile" "lisa-tgi" "latest" "./lib/serve/ecs-model/textgen/tgi" \
-        "NODE_ENV=production" \
-        "BASE_IMAGE=ghcr.io/huggingface/text-generation-inference:latest" \
-        "MOUNTS3_DEB_URL=https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb"
-
-    # lisa-vllm
-    build_image "Dockerfile" "lisa-vllm" "latest" "./lib/serve/ecs-model/vllm" \
-        "NODE_ENV=production" \
-        "BASE_IMAGE=public.ecr.aws/deep-learning-containers/vllm:0.15-gpu-py312-ec2" \
-        "MOUNTS3_DEB_URL=https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb"
+    ##################################################################################
+    # Note: Model-hosting images (lisa-tei, lisa-tgi, lisa-vllm) are not exported here
+    # as they exceed GitHub Release asset size limits (~2GB). These images are built
+    # at deploy time from their public base images. Uncomment to build.
+    ##################################################################################
+    # # lisa-tei
+    # build_image "embedding/tei/Dockerfile" "lisa-tei" "latest" "./lib/serve/ecs-model" \
+    #     "NODE_ENV=production" \
+    #     "BASE_IMAGE=ghcr.io/huggingface/text-embeddings-inference:latest" \
+    #     "MOUNTS3_DEB_URL=https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb"
+
+    # # lisa-tgi
+    # build_image "textgen/tgi/Dockerfile" "lisa-tgi" "latest" "./lib/serve/ecs-model" \
+    #     "NODE_ENV=production" \
+    #     "BASE_IMAGE=ghcr.io/huggingface/text-generation-inference:latest" \
+    #     "MOUNTS3_DEB_URL=https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb"
+
+    # # lisa-vllm
+    # build_image "vllm/Dockerfile" "lisa-vllm" "latest" "./lib/serve/ecs-model" \
+    #     "NODE_ENV=production" \
+    #     "BASE_IMAGE=public.ecr.aws/deep-learning-containers/vllm:0.17-gpu-py312-ec2" \
+    #     "MOUNTS3_DEB_URL=https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb"
 
     echo "All images built successfully!"
 }