Merge pull request #94 from trustyai-explainability/main

sheltoncyril · web-flow · commit 61d3e103401b · 2026-03-31T16:11:44.000+01:00
[pull] main from trustyai-explainability:main
diff --git a/.gitignore b/.gitignore
@@ -10,7 +10,13 @@ __pycache__/
 dist/
 .env
 _providers.d/
-
+meta/
+scan_out/
+**.env
+*.csv
+*.ipynb
+*.json
+*.cpu
 # Hermeto outputs (generated during testing)
 hermeto-output*/
 hermeto*.env
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,3 +13,11 @@ repos:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
       - id: ruff-format
+  - repo: local
+    hooks:
+      - id: mypy
+        name: mypy type check
+        entry: mypy src/
+        language: system
+        pass_filenames: false
+        types: [python]
diff --git a/src/llama_stack_provider_trustyai_garak/constants.py b/src/llama_stack_provider_trustyai_garak/constants.py
@@ -21,3 +21,8 @@
 
 # SDG variables
 DEFAULT_SDG_FLOW_ID = "major-sage-742"
+DEFAULT_SDG_MAX_CONCURRENCY = 10
+DEFAULT_SDG_NUM_SAMPLES = 0
+DEFAULT_SDG_NUM_SAMPLES_BLOCK_NAME = "replicate_rows"
+DEFAULT_SDG_MAX_TOKENS = 0
+DEFAULT_SDG_MAX_TOKENS_BLOCK_NAME = "generate_adversarial_prompt"
diff --git a/src/llama_stack_provider_trustyai_garak/core/pipeline_steps.py b/src/llama_stack_provider_trustyai_garak/core/pipeline_steps.py
@@ -269,6 +269,9 @@ def run_sdg_generation(
     sdg_model: str,
     sdg_api_base: str,
     sdg_flow_id: str = "",
+    sdg_max_concurrency: int = 0,
+    sdg_num_samples: int = 0,
+    sdg_max_tokens: int = 0,
 ) -> pd.DataFrame:
     """Run Synthetic Data Generation on a taxonomy.  Returns the raw DataFrame.
 
@@ -299,6 +302,9 @@ def run_sdg_generation(
         flow_id=effective_flow_id,
         api_key=effective_key,
         taxonomy=taxonomy_df,
+        max_concurrency=sdg_max_concurrency,
+        num_samples=sdg_num_samples,
+        max_tokens=sdg_max_tokens,
     )
     logger.info(
         "SDG produced %d raw rows across %d categories",
diff --git a/src/llama_stack_provider_trustyai_garak/evalhub/garak_adapter.py b/src/llama_stack_provider_trustyai_garak/evalhub/garak_adapter.py
@@ -59,14 +59,17 @@
     parse_digest_from_report_content,
     parse_generations_from_report_content,
 )
-from ..utils import get_scan_base_dir, as_bool
+from ..utils import get_scan_base_dir, as_bool, safe_int
 from ..constants import (
     DEFAULT_TIMEOUT,
     DEFAULT_MODEL_TYPE,
     DEFAULT_EVAL_THRESHOLD,
     EXECUTION_MODE_SIMPLE,
     EXECUTION_MODE_KFP,
     DEFAULT_SDG_FLOW_ID,
+    DEFAULT_SDG_MAX_CONCURRENCY,
+    DEFAULT_SDG_NUM_SAMPLES,
+    DEFAULT_SDG_MAX_TOKENS,
 )
 
 logger = logging.getLogger(__name__)
@@ -546,6 +549,9 @@ def _run_via_kfp(
             "sdg_model": ip.get("sdg_model", ""),
             "sdg_api_base": ip.get("sdg_api_base", ""),
             "sdg_flow_id": ip.get("sdg_flow_id", DEFAULT_SDG_FLOW_ID),
+            "sdg_max_concurrency": ip.get("sdg_max_concurrency", DEFAULT_SDG_MAX_CONCURRENCY),
+            "sdg_num_samples": ip.get("sdg_num_samples", DEFAULT_SDG_NUM_SAMPLES),
+            "sdg_max_tokens": ip.get("sdg_max_tokens", DEFAULT_SDG_MAX_TOKENS),
         }
         if model_auth_secret:
             pipeline_args["model_auth_secret_name"] = model_auth_secret
@@ -933,6 +939,20 @@ def _build_config_from_spec(
             "intents_s3_key": benchmark_config.get("intents_s3_key", profile.get("intents_s3_key", "")),
             "intents_format": benchmark_config.get("intents_format", profile.get("intents_format", "csv")),
             "sdg_flow_id": benchmark_config.get("sdg_flow_id", profile.get("sdg_flow_id", DEFAULT_SDG_FLOW_ID)),
+            "sdg_max_concurrency": safe_int(
+                benchmark_config.get(
+                    "sdg_max_concurrency", profile.get("sdg_max_concurrency", DEFAULT_SDG_MAX_CONCURRENCY)
+                ),
+                DEFAULT_SDG_MAX_CONCURRENCY,
+            ),
+            "sdg_num_samples": safe_int(
+                benchmark_config.get("sdg_num_samples", profile.get("sdg_num_samples", DEFAULT_SDG_NUM_SAMPLES)),
+                DEFAULT_SDG_NUM_SAMPLES,
+            ),
+            "sdg_max_tokens": safe_int(
+                benchmark_config.get("sdg_max_tokens", profile.get("sdg_max_tokens", DEFAULT_SDG_MAX_TOKENS)),
+                DEFAULT_SDG_MAX_TOKENS,
+            ),
             "disable_cache": as_bool(benchmark_config.get("disable_cache", False)),
         }
 
diff --git a/src/llama_stack_provider_trustyai_garak/evalhub/kfp_pipeline.py b/src/llama_stack_provider_trustyai_garak/evalhub/kfp_pipeline.py
@@ -31,7 +31,12 @@
 
 from kfp import dsl, kubernetes
 
-from ..constants import DEFAULT_SDG_FLOW_ID
+from ..constants import (
+    DEFAULT_SDG_FLOW_ID,
+    DEFAULT_SDG_MAX_CONCURRENCY,
+    DEFAULT_SDG_NUM_SAMPLES,
+    DEFAULT_SDG_MAX_TOKENS,
+)
 from ..core.pipeline_steps import MODEL_AUTH_MOUNT_PATH
 
 logger = logging.getLogger(__name__)
@@ -254,6 +259,9 @@ def sdg_generate(
     sdg_model: str,
     sdg_api_base: str,
     sdg_flow_id: str,
+    sdg_max_concurrency: int,
+    sdg_num_samples: int,
+    sdg_max_tokens: int,
     taxonomy_dataset: dsl.Input[dsl.Dataset],
     sdg_dataset: dsl.Output[dsl.Dataset],
 ):
@@ -306,6 +314,9 @@ def sdg_generate(
         sdg_model=sdg_model,
         sdg_api_base=sdg_api_base,
         sdg_flow_id=sdg_flow_id,
+        sdg_max_concurrency=sdg_max_concurrency,
+        sdg_num_samples=sdg_num_samples,
+        sdg_max_tokens=sdg_max_tokens,
     )
     raw_df.to_csv(sdg_dataset.path, index=False)
     log.info("Wrote %d raw SDG rows to artifact", len(raw_df))
@@ -625,6 +636,9 @@ def evalhub_garak_pipeline(
     sdg_model: str = "",
     sdg_api_base: str = "",
     sdg_flow_id: str = DEFAULT_SDG_FLOW_ID,
+    sdg_max_concurrency: int = DEFAULT_SDG_MAX_CONCURRENCY,
+    sdg_num_samples: int = DEFAULT_SDG_NUM_SAMPLES,
+    sdg_max_tokens: int = DEFAULT_SDG_MAX_TOKENS,
 ):
     """Six-step pipeline: validate, resolve taxonomy, SDG, prepare prompts, scan, write outputs.
 
@@ -682,6 +696,9 @@ def evalhub_garak_pipeline(
         sdg_model=sdg_model,
         sdg_api_base=sdg_api_base,
         sdg_flow_id=sdg_flow_id,
+        sdg_max_concurrency=sdg_max_concurrency,
+        sdg_num_samples=sdg_num_samples,
+        sdg_max_tokens=sdg_max_tokens,
         taxonomy_dataset=taxonomy_task.outputs["taxonomy_dataset"],
     )
     sdg_task.set_caching_options(True)
diff --git a/src/llama_stack_provider_trustyai_garak/remote/garak_remote_eval.py b/src/llama_stack_provider_trustyai_garak/remote/garak_remote_eval.py
@@ -21,7 +21,8 @@
 from ..base_eval import GarakEvalBase
 from llama_stack_provider_trustyai_garak import shield_scan
 from ..errors import GarakError, GarakConfigError, GarakValidationError
-from ..utils import as_bool
+from ..constants import DEFAULT_SDG_MAX_CONCURRENCY, DEFAULT_SDG_NUM_SAMPLES, DEFAULT_SDG_MAX_TOKENS
+from ..utils import as_bool, safe_int
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -198,6 +199,18 @@ async def run_eval(self, request: RunEvalRequest) -> Job:
                     "sdg_model": provider_params.get("sdg_model", ""),
                     "sdg_api_base": provider_params.get("sdg_api_base", ""),
                     "sdg_flow_id": provider_params.get("sdg_flow_id", ""),
+                    "sdg_max_concurrency": safe_int(
+                        provider_params.get("sdg_max_concurrency", DEFAULT_SDG_MAX_CONCURRENCY),
+                        DEFAULT_SDG_MAX_CONCURRENCY,
+                    ),
+                    "sdg_num_samples": safe_int(
+                        provider_params.get("sdg_num_samples", DEFAULT_SDG_NUM_SAMPLES),
+                        DEFAULT_SDG_NUM_SAMPLES,
+                    ),
+                    "sdg_max_tokens": safe_int(
+                        provider_params.get("sdg_max_tokens", DEFAULT_SDG_MAX_TOKENS),
+                        DEFAULT_SDG_MAX_TOKENS,
+                    ),
                 },
                 run_name=f"garak-{benchmark_id.split('::')[-1]}-{job_id.removeprefix(JOB_ID_PREFIX)}",
                 namespace=self._config.kubeflow_config.namespace,
diff --git a/src/llama_stack_provider_trustyai_garak/remote/kfp_utils/components.py b/src/llama_stack_provider_trustyai_garak/remote/kfp_utils/components.py
@@ -228,6 +228,9 @@ def sdg_generate(
     sdg_model: str,
     sdg_api_base: str,
     sdg_flow_id: str,
+    sdg_max_concurrency: int,
+    sdg_num_samples: int,
+    sdg_max_tokens: int,
     taxonomy_dataset: dsl.Input[dsl.Dataset],
     sdg_dataset: dsl.Output[dsl.Dataset],
 ):
@@ -269,6 +272,9 @@ def sdg_generate(
         sdg_model=sdg_model,
         sdg_api_base=sdg_api_base,
         sdg_flow_id=sdg_flow_id,
+        sdg_max_concurrency=sdg_max_concurrency,
+        sdg_num_samples=sdg_num_samples,
+        sdg_max_tokens=sdg_max_tokens,
     )
     raw_df.to_csv(sdg_dataset.path, index=False)
     log.info("Wrote %d raw SDG rows to artifact", len(raw_df))
diff --git a/src/llama_stack_provider_trustyai_garak/remote/kfp_utils/pipeline.py b/src/llama_stack_provider_trustyai_garak/remote/kfp_utils/pipeline.py
@@ -13,7 +13,12 @@
 from kfp import dsl
 from dotenv import load_dotenv
 import logging
-from ...constants import DEFAULT_SDG_FLOW_ID
+from ...constants import (
+    DEFAULT_SDG_FLOW_ID,
+    DEFAULT_SDG_MAX_CONCURRENCY,
+    DEFAULT_SDG_NUM_SAMPLES,
+    DEFAULT_SDG_MAX_TOKENS,
+)
 
 from .components import (
     validate,
@@ -45,6 +50,9 @@ def garak_scan_pipeline(
     sdg_model: str = "",
     sdg_api_base: str = "",
     sdg_flow_id: str = DEFAULT_SDG_FLOW_ID,
+    sdg_max_concurrency: int = DEFAULT_SDG_MAX_CONCURRENCY,
+    sdg_num_samples: int = DEFAULT_SDG_NUM_SAMPLES,
+    sdg_max_tokens: int = DEFAULT_SDG_MAX_TOKENS,
 ):
     """Six-step pipeline: validate, resolve taxonomy, SDG, prepare prompts, scan, parse.
 
@@ -86,6 +94,9 @@ def garak_scan_pipeline(
         sdg_model=sdg_model,
         sdg_api_base=sdg_api_base,
         sdg_flow_id=sdg_flow_id,
+        sdg_max_concurrency=sdg_max_concurrency,
+        sdg_num_samples=sdg_num_samples,
+        sdg_max_tokens=sdg_max_tokens,
         taxonomy_dataset=taxonomy_task.outputs["taxonomy_dataset"],
     )
     sdg_task.set_caching_options(True)
diff --git a/src/llama_stack_provider_trustyai_garak/resources/art_report.jinja2 b/src/llama_stack_provider_trustyai_garak/resources/art_report.jinja2
@@ -42,11 +42,8 @@
 <body>
 <div class="pf-v6-c-page" id="masthead-basic-example">
     <header class="pf-v6-c-masthead" id="masthead-basic-example-masthead">
-        <div class="pf-v6-c-masthead__main">
-            <div class="pf-v6-c-masthead__brand"></div>
-            <div class="pf-v6-c-masthead__content">
-                <h1 class="pf-v6-c-title pf-m-xl">Automated Red Teaming Report</h1>
-            </div>
+        <div class="pf-v6-c-masthead__content">
+            <h1 class="pf-v6-c-title pf-m-xl">Automated Red Teaming Report</h1>
         </div>
     </header>
     <div class="pf-v6-c-page__sidebar">
diff --git a/src/llama_stack_provider_trustyai_garak/sdg.py b/src/llama_stack_provider_trustyai_garak/sdg.py
@@ -10,7 +10,12 @@
 import re
 import logging
 from typing import List, Dict, Any, NamedTuple, Optional
-from .constants import DEFAULT_SDG_FLOW_ID
+from .constants import (
+    DEFAULT_SDG_FLOW_ID,
+    DEFAULT_SDG_MAX_CONCURRENCY,
+    DEFAULT_SDG_NUM_SAMPLES_BLOCK_NAME,
+    DEFAULT_SDG_MAX_TOKENS_BLOCK_NAME,
+)
 
 import pandas
 
@@ -332,26 +337,28 @@
 ]
 
 
-_DEFAULT_MAX_CONCURRENCY = 10
+def _resolve_max_concurrency(value: int = 0) -> int:
+    """Resolve effective max_concurrency.
 
-
-def _resolve_max_concurrency() -> int:
-    """Read ``SDG_MAX_CONCURRENCY`` from the environment, with validation."""
+    Precedence: explicit *value* (if >= 1) > ``SDG_MAX_CONCURRENCY`` env var > constant default.
+    """
+    if value >= 1:
+        return value
     raw = os.environ.get("SDG_MAX_CONCURRENCY")
     if raw is None:
-        return _DEFAULT_MAX_CONCURRENCY
+        return DEFAULT_SDG_MAX_CONCURRENCY
     try:
-        value = int(raw)
-        if value < 1:
+        env_val = int(raw)
+        if env_val < 1:
             raise ValueError("must be >= 1")
-        return value
+        return env_val
     except ValueError:
         logger.warning(
             "Invalid SDG_MAX_CONCURRENCY=%r, falling back to %d",
             raw,
-            _DEFAULT_MAX_CONCURRENCY,
+            DEFAULT_SDG_MAX_CONCURRENCY,
         )
-        return _DEFAULT_MAX_CONCURRENCY
+        return DEFAULT_SDG_MAX_CONCURRENCY
 
 
 class SDGResult(NamedTuple):
@@ -361,12 +368,31 @@ class SDGResult(NamedTuple):
     normalized: pandas.DataFrame
 
 
+def _override_flow_block(flow, block_name: str, overrides: dict) -> None:
+    """Find a block by ``block_name`` and patch its config.
+
+    Searches the flow's block list by name so we are not sensitive to
+    reordering in upstream flow definitions.
+    """
+    for i, block in enumerate(flow.blocks):
+        cfg = block.get_config()
+        if cfg.get("block_name") == block_name:
+            cfg.update(overrides)
+            flow.blocks[i] = block.from_config(cfg)
+            logger.info("Overrode block %r at index %d: %s", block_name, i, overrides)
+            return
+    logger.warning("Block %r not found in flow — override skipped", block_name)
+
+
 def generate_sdg_dataset(
     model: str,
     api_base: str,
     flow_id: str = DEFAULT_SDG_FLOW_ID,
     api_key: str = "dummy",
     taxonomy: Optional[pandas.DataFrame] = None,
+    max_concurrency: int = 0,
+    num_samples: int = 0,
+    max_tokens: int = 0,
 ) -> SDGResult:
     """Generate a red-team prompt dataset using sdg_hub.
 
@@ -379,11 +405,19 @@ def generate_sdg_dataset(
     :func:`~.intents.load_taxonomy_dataset`) to override the default
     harm categories.
 
+    Args:
+        num_samples: Override ``RowMultiplierBlock.num_samples`` (rows per
+            input row).  ``0`` keeps the flow default.
+        max_tokens: Override ``LLMChatBlock.max_tokens`` (token limit per
+            request).  ``0`` keeps the flow default.
+
     Returns:
         :class:`SDGResult` with ``raw`` (all columns from SDG including
         pools) and ``normalized`` (``category``, ``prompt``,
         ``description`` only).
     """
+    max_concurrency = _resolve_max_concurrency(max_concurrency)
+
     import nest_asyncio
     from sdg_hub import FlowRegistry, Flow
 
@@ -411,7 +445,11 @@ def generate_sdg_dataset(
     flow = Flow.from_yaml(flow_path)
     flow.set_model_config(model=model, api_base=api_base, api_key=api_key)
 
-    max_concurrency = _resolve_max_concurrency()
+    if num_samples >= 1:
+        _override_flow_block(flow, DEFAULT_SDG_NUM_SAMPLES_BLOCK_NAME, {"num_samples": num_samples})
+    if max_tokens >= 1:
+        _override_flow_block(flow, DEFAULT_SDG_MAX_TOKENS_BLOCK_NAME, {"max_tokens": max_tokens})
+
     logger.info("SDG generation: max_concurrency=%d", max_concurrency)
     result = flow.generate(df, max_concurrency=max_concurrency)
 
diff --git a/src/llama_stack_provider_trustyai_garak/utils.py b/src/llama_stack_provider_trustyai_garak/utils.py
diff --git a/tests/test_evalhub_adapter.py b/tests/test_evalhub_adapter.py
diff --git a/tests/test_sdg_params.py b/tests/test_sdg_params.py