NVIDIA-NeMo
diff --git a/‎src/nemo_safe_synthesizer/evaluation/assets/text/multi_modal_tooltips.py‎
Lines changed: 2 additions & 0 deletions b/‎src/nemo_safe_synthesizer/evaluation/assets/text/multi_modal_tooltips.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/nemo_safe_synthesizer/evaluation/components/attribute_inference_protection.py‎
Lines changed: 42 additions & 15 deletions b/‎src/nemo_safe_synthesizer/evaluation/components/attribute_inference_protection.py‎
Lines changed: 42 additions & 15 deletions
diff --git a/‎src/nemo_safe_synthesizer/evaluation/components/column_distribution.py‎
Lines changed: 19 additions & 11 deletions b/‎src/nemo_safe_synthesizer/evaluation/components/column_distribution.py‎
Lines changed: 19 additions & 11 deletions
diff --git a/‎src/nemo_safe_synthesizer/evaluation/components/component.py‎
Lines changed: 28 additions & 5 deletions b/‎src/nemo_safe_synthesizer/evaluation/components/component.py‎
Lines changed: 28 additions & 5 deletions
diff --git a/‎src/nemo_safe_synthesizer/evaluation/components/composite_score.py‎
Lines changed: 9 additions & 2 deletions b/‎src/nemo_safe_synthesizer/evaluation/components/composite_score.py‎
Lines changed: 9 additions & 2 deletions
@@ -3,6 +3,8 @@
 
 # ruff: noqa
 
+"""Tooltip text displayed in the multi-modal HTML evaluation report."""
+
 tooltips = {
     "dataset_statistics_info": """
         The dataset statistics provide a summary of the datasets. The table includes the number of rows and columns,
 
@@ -41,13 +41,27 @@
 
 
 class AttributeInferenceProtection(Component):
+    """Attribute Inference Protection privacy metric.
+
+    Simulates an attribute inference attack: given quasi-identifier columns,
+    can an adversary use synthetic nearest-neighbors to predict the remaining
+    attributes of a training record?  A higher score indicates better
+    protection (lower prediction accuracy).
+
+    See Also:
+        https://arxiv.org/abs/2501.03941 -- Synthetic Data Privacy Metrics.
+    """
+
     name: str = Field(default="Attribute Inference Protection")
-    col_accuracy_df: pd.DataFrame | None = Field(default=None)
+    col_accuracy_df: pd.DataFrame | None = Field(
+        default=None, description="Per-column prediction risk scores and grades."
+    )
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     @cached_property
     def jinja_context(self) -> dict[str, str]:
+        """Template context with the attribute-inference bar chart figure."""
         d = super().jinja_context
         d["anchor_link"] = "#aia"
         if self.col_accuracy_df is not None and not self.col_accuracy_df.empty:
@@ -62,6 +76,7 @@ def jinja_context(self) -> dict[str, str]:
     def from_evaluation_dataset(
         evaluation_dataset: EvaluationDataset, config: SafeSynthesizerParameters | None = None
     ) -> AttributeInferenceProtection:
+        """Run the attribute inference attack and return the protection score."""
         if not faiss_available:
             logger.info("FAISS is not available, skipping Attribute Inference Attack.")
             return AttributeInferenceProtection(score=EvaluationScore())
@@ -180,7 +195,7 @@ def _is_really_categorical(column: str) -> bool:
 
     @staticmethod
     def _divide_tabular_text(df: pd.DataFrame, text_fields: list) -> tuple[pd.DataFrame, pd.DataFrame]:
-        """Takes a dataframe and divides it into two dataframes, one with the text fields and one with the tabular fields"""
+        """Split a dataframe into tabular-only and text-only subsets."""
         tabular_fields = []
         for col in df.columns:
             if col not in text_fields:
@@ -192,9 +207,7 @@ def _divide_tabular_text(df: pd.DataFrame, text_fields: list) -> tuple[pd.DataFr
 
     @staticmethod
     def _embed_text(df: pd.DataFrame, embedder) -> pd.DataFrame:
-        """Takes a dataframe of text fields, finds the embeddings for each
-        and then averages the embeddings into one embedding and returns a dataframe with just that
-        """
+        """Embed each text column and average into a single embedding per row."""
         embeddings = {}
         for col in df.columns:
             data = df[col].to_list()
@@ -267,7 +280,7 @@ def _get_synth_nn(
         if len(text_columns) == 0:
             # Create the faiss index on the synthetic data
             dim = df_synth_norm.shape[1]
-            index = faiss.IndexFlatL2(dim)  # ty: ignore[unresolved-attribute, possibly-unbound-attribute]
+            index = faiss.IndexFlatL2(dim)  # ty: ignore[possibly-unbound-attribute]
 
             # This usage matches documentation. Specifying n= and x= parameters as
             # the type annotation for IndexFlatL2.add suggests seems unnecessary, possibly related
@@ -288,15 +301,15 @@ def _get_synth_nn(
             df_train_embeddings = AttributeInferenceProtection._embed_text(df_train_text, embedder)
             df_synth_embeddings = AttributeInferenceProtection._embed_text(df_synth_text, embedder)
             hits = util.semantic_search(
-                np.array(list(df_train_embeddings["embedding"])),
-                np.array(list(df_synth_embeddings["embedding"])),
+                np.array(list(df_train_embeddings["embedding"])),  # ty: ignore[invalid-argument-type]
+                np.array(list(df_synth_embeddings["embedding"])),  # ty: ignore[invalid-argument-type]
                 top_k=k,
             )
             synth_rows = pd.DataFrame()
             for i in range(k):
                 corpus_id = hits[0][i]["corpus_id"]
                 synth_rows = pd.concat(
-                    [synth_rows, pd.DataFrame([df_synth.iloc[corpus_id]])],
+                    [synth_rows, pd.DataFrame([df_synth.iloc[int(corpus_id)]])],
                     ignore_index=True,
                 )
 
@@ -310,8 +323,8 @@ def _get_synth_nn(
         df_synth_embeddings = AttributeInferenceProtection._embed_text(df_synth_text, embedder)
         search_synth_k = min(1000, len(df_synth_embeddings))
         hits = util.semantic_search(
-            np.array(list(df_train_embeddings["embedding"])),
-            np.array(list(df_synth_embeddings["embedding"])),
+            np.array(list(df_train_embeddings["embedding"])),  # ty: ignore[invalid-argument-type]
+            np.array(list(df_synth_embeddings["embedding"])),  # ty: ignore[invalid-argument-type]
             top_k=search_synth_k,
         )
         synth_NN = pd.DataFrame()
@@ -324,12 +337,12 @@ def _get_synth_nn(
             dist = 1 - sim
             text_dist[i] = dist
             corpus_ids.append(corpus_id)
-            synth_NN = pd.concat([synth_NN, pd.DataFrame([df_synth_norm.iloc[corpus_id]])], ignore_index=True)
+            synth_NN = pd.concat([synth_NN, pd.DataFrame([df_synth_norm.iloc[int(corpus_id)]])], ignore_index=True)
 
         # Now get the tabular similarity for these 1000 NN
 
         dim = synth_NN.shape[1]
-        index = faiss.IndexFlatL2(dim)  # ty: ignore[unresolved-attribute, possibly-unbound-attribute]
+        index = faiss.IndexFlatL2(dim)  # ty: ignore[possibly-unbound-attribute]
         index.add(np.float32(np.ascontiguousarray(np.array(synth_NN))))  # ty: ignore[missing-argument]
         dists, indexes = index.search(np.float32(np.ascontiguousarray(np.array(df_train_norm))), search_synth_k)  # ty: ignore[missing-argument]
         # Scale the Euclidean distance to [0,1]
@@ -372,6 +385,20 @@ def _aia(
         df_synth: pd.DataFrame,
         quasi_identifier_count: int,
     ) -> tuple[EvaluationScore, pd.DataFrame | None]:
+        """Core attribute inference attack implementation.
+
+        Iterates over random quasi-identifier subsets, finds nearest
+        synthetic neighbors, and measures attribute prediction accuracy
+        weighted by column entropy.
+
+        Args:
+            df_train: Training dataframe.
+            df_synth: Synthetic dataframe.
+            quasi_identifier_count: Number of columns to use as quasi-identifiers.
+
+        Returns:
+            Tuple of (overall protection score, per-column accuracy dataframe).
+        """
         ias = EvaluationScore(grade=PrivacyGrade.UNAVAILABLE)
         col_accuracy_df = None
         if quasi_identifier_count is None:
@@ -408,7 +435,7 @@ def _aia(
             nominal_columns = list(df_train.select_dtypes(include=["object", "category", "bool"]).columns)
             numeric_columns = [column for column in df_train.columns if column not in nominal_columns]
 
-            # Now seperate out the text columns from the nominal
+            # Now separate out the text columns from the nominal
 
             text_columns = []
             for col in nominal_columns:
@@ -531,7 +558,7 @@ def _aia(
                 # Lat/lon values inspired this. Text must be dist .35 or less
                 for column in predict_columns:
                     synth_val = synth_values[column]
-                    train_val = train_row_all.iloc[0][column]
+                    train_val = train_row_all.iloc[0][column]  # ty: ignore[invalid-argument-type]
 
                     if pd.isna(train_val):
                         continue
 
@@ -23,9 +23,11 @@
 
 
 class ColumnDistributionPlotRow(BaseModel):
-    name1: str = Field()
-    name2: str | None = Field()
-    figure: str = Field()
+    """A pair of side-by-side column distribution plots for the HTML report."""
+
+    name1: str = Field(description="Name of the first column in the plot row.")
+    name2: str | None = Field(description="Name of the second column in the plot row, if present.")
+    figure: str = Field(description="Rendered HTML of the side-by-side distribution plot.")
 
     @staticmethod
     def _get_figure_for_field(f: EvaluationField | None, reference: pd.Series, output) -> Figure | None:
@@ -88,21 +90,26 @@ def from_evaluation_dataset(evaluation_dataset: EvaluationDataset) -> list[dict[
 
 
 class ColumnDistribution(Component):
-    """
-    This class wears a few hats, not ideal but saves some duplication:
-    * Rendering of each EvaluationFields histogram
-    * Rendering of Reference Columns table
-    * Computation/rendering of Column Distribution Stability score
-    * Field Distribution Stability functions are used for text metrics and (iirc) PCA as well
+    """Column Distribution Stability metric.
+
+    Computes per-column Jensen-Shannon divergence between reference and
+    output distributions, averages across all tabular columns, and maps
+    the result to a 0--10 score.  Also carries data for the per-column
+    histogram figures and the Reference Columns table in the HTML report.
     """
 
     name: str = Field(default="Column Distribution Stability")
     # Keep a copy to simplify rendering
-    column_statistics: dict[str, ColumnStatistics] | None = Field(default=None)
-    evaluation_fields: list[EvaluationField] = Field(default=list())
+    column_statistics: dict[str, ColumnStatistics] | None = Field(
+        default=None, description="Per-column PII entity and transform metadata."
+    )
+    evaluation_fields: list[EvaluationField] = Field(
+        default=list(), description="Per-column evaluation metadata and distribution scores."
+    )
 
     @cached_property
     def jinja_context(self):
+        """Template context with evaluation fields and column statistics for the report."""
         d = super().jinja_context
         d["anchor_link"] = "#distribution-stability"
         if self.evaluation_fields:
@@ -117,6 +124,7 @@ def jinja_context(self):
     def from_evaluation_dataset(
         evaluation_dataset: EvaluationDataset, config: SafeSynthesizerParameters | None = None
     ) -> ColumnDistribution:
+        """Compute column distribution stability from the evaluation dataset."""
         tabular_columns = set(evaluation_dataset.get_tabular_columns())
         tabular_fields = [f for f in evaluation_dataset.evaluation_fields if f.name in tabular_columns]
         if tabular_fields:
 
@@ -17,22 +17,45 @@
 
 
 class Component(ABC, BaseModel):
-    name: str = Field(
-        description="Override this with the fancy display name of your component. It is used for json summaries and rendering scores."
+    """Abstract base for all evaluation components.
+
+    Each component computes one quality or privacy metric from an
+    ``EvaluationDataset`` and exposes a ``jinja_context`` property
+    for HTML report rendering.
+
+    Subclasses should override ``from_evaluation_dataset`` to perform
+    their metric-specific computation.
+    """
+
+    name: str = Field(description="Display name used in JSON summaries and the HTML report.")
+    score: EvaluationScore = Field(
+        default=EvaluationScore(), description="The computed EvaluationScore for this component."
     )
-    score: EvaluationScore = Field(default=EvaluationScore())
 
     @staticmethod
     def from_evaluation_dataset(
         evaluation_dataset: EvaluationDataset, config: SafeSynthesizerParameters | None = None
     ) -> Component:
-        return Component()
+        """Create a component from an ``EvaluationDataset``.
+
+        Subclasses override this to compute their specific metric.
+
+        Args:
+            evaluation_dataset: Paired reference/output data.
+            config: Optional pipeline configuration parameters.
+
+        Returns:
+            A new component instance with computed scores.
+        """
+        return Component()  # ty: ignore[missing-argument]
 
     def get_json(self) -> str:
+        """Serialize the component score to a JSON string."""
         return self.score.model_dump_json()
 
     @cached_property
     def jinja_context(self) -> dict[str, Any]:
+        """Template context dict for Jinja2 rendering, keyed by name, score, and figure HTML."""
         # Dict values are typed as "Any" but err on the side of primitives (html strings, not plotly.Figure e.g.).
         # Prepping up front saves formatting logic inlined in templates.
         d = dict()
@@ -45,7 +68,7 @@ def jinja_context(self) -> dict[str, Any]:
 
     @staticmethod
     def is_nonempty(dfs: None | pd.DataFrame | list[pd.DataFrame | None]) -> bool:
-        """Util for components that need to check dataframes before attempting to render (correlation and PCA)"""
+        """Return ``True`` if all provided DataFrames are non-``None`` and non-empty."""
         if dfs is None:
             return False
         if isinstance(dfs, pd.DataFrame):
 
@@ -11,8 +11,14 @@
 
 
 class CompositeScore(Component):
+    """A component whose score is the mean of its child component scores.
+
+    Used as the base for aggregate metrics like SQS and Data Privacy Score.
+    """
+
     @cached_property
     def jinja_context(self):
+        """Template context with duplicate gauge figures for overview and detail sections."""
         d = super().jinja_context
         # This is some "plotly magic."  The figure is a div with an id and an inlined script.
         # If you attempt to reuse the figure (we do), it won't render for the second one.
@@ -22,14 +28,15 @@ def jinja_context(self):
 
     @staticmethod
     def from_components(components: list[Component] | Component, name: str) -> CompositeScore:
+        """Compute a composite score as the mean of child component scores."""
         if isinstance(components, Component):
-            return CompositeScore(score=components.score)
+            return CompositeScore(score=components.score, name=name)
         if (
             components is None
             or len(components) == 0
             or all([True for c in components if c.score is None or c.score.score is None])
         ):
-            return CompositeScore(score=EvaluationScore())
+            return CompositeScore(score=EvaluationScore(), name=name)
 
         # Take the mean
         total = 0.0