google
diff --git a/‎requirements.txt‎
Lines changed: 2 additions & 1 deletion b/‎requirements.txt‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/metrax/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎src/metrax/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/metrax/image_metrics.py‎
Lines changed: 362 additions & 0 deletions b/‎src/metrax/image_metrics.py‎
Lines changed: 362 additions & 0 deletions
@@ -6,4 +6,5 @@ keras-nlp
 keras-rs
 pytest
 rouge-score
-scikit-learn
+scikit-learn
+tensorflow
@@ -14,6 +14,7 @@
 
 from metrax import base
 from metrax import classification_metrics
+from metrax import image_metrics
 from metrax import nlp_metrics
 from metrax import ranking_metrics
 from metrax import regression_metrics
@@ -38,6 +39,7 @@
 RecallAtK = ranking_metrics.RecallAtK
 RougeL = nlp_metrics.RougeL
 RougeN = nlp_metrics.RougeN
+SSIM = image_metrics.SSIM
 WER = nlp_metrics.WER
 
 
@@ -62,5 +64,6 @@
     "RecallAtK",
     "RougeL",
     "RougeN",
+    "SSIM",
     "WER",
 ]
@@ -0,0 +1,362 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A collection of different metrics for image models."""
+
+import flax
+import jax
+from jax import lax
+import jax.numpy as jnp
+from metrax import base
+
+
+def _gaussian_kernel1d(sigma, radius):
+  r"""Generates a 1D normalized Gaussian kernel.
+
+  This function creates a 1D Gaussian kernel, which can be used for smoothing
+  operations. The kernel is centered at zero and its values are determined by
+  the Gaussian function:
+
+  .. math::
+    \phi(x) = e^{-\frac{x^2}{2\sigma^2}}
+
+  The resulting kernel :math:`\phi(x)` is then normalized by dividing each
+  element by the sum of all elements, so that the sum of the kernel's elements
+  is 1. This function assumes an order of 0 for the Gaussian derivative (i.e.,
+  a standard smoothing kernel).
+
+  Args:
+    sigma (float): The standard deviation (:math:`\sigma`) of the Gaussian
+      distribution. This controls the "width" or "spread" of the kernel.
+    radius (int): The radius of the kernel. The kernel will include points from
+      :math:`-radius` to :math:`+radius`. The total size of the kernel will be
+      :math:`2 \times radius + 1`.
+
+  Returns:
+    jnp.ndarray: A 1D JAX array representing the normalized Gaussian kernel.
+  """
+  sigma2 = sigma * sigma
+  x = jnp.arange(-radius, radius + 1)
+  phi_x = jnp.exp(-0.5 / sigma2 * x**2)
+  phi_x = phi_x / phi_x.sum()
+  return phi_x
+
+
+@flax.struct.dataclass
+class SSIM(base.Average):
+  r"""SSIM (Structural Similarity Index Measure) Metric.
+
+  This class calculates the structural similarity between predicted and target
+  images and averages it over a dataset. SSIM is a perception-based model that
+  considers changes in structural information, luminance, and contrast.
+
+  The general SSIM formula considers three components: luminance (l),
+      contrast (c), and structure (s):
+
+      .. math::
+        SSIM(x, y) = [l(x, y)]^\alpha \cdot [c(x, y)]^\beta \cdot [s(x,
+        y)]^\gamma
+
+      Where:
+        - Luminance comparison:
+          :math:`l(x, y) = \frac{2\mu_x\mu_y + c_1}{\mu_x^2 + \mu_y^2 + c_1}`
+        - Contrast comparison:
+          :math:`c(x, y) = \frac{2\sigma_x\sigma_y + c_2}{\sigma_x^2 +
+          \sigma_y^2 + c_2}`
+        - Structure comparison:
+          :math:`s(x, y) = \frac{\sigma_{xy} + c_3}{\sigma_x\sigma_y + c_3}`
+
+      This implementation uses a common simplified form where :math:`\alpha =
+      \beta = \gamma = 1` and :math:`c_3 = c_2 / 2`.
+
+      This leads to the combined formula:
+
+      .. math::
+        SSIM(x, y) = \frac{(2\mu_x\mu_y + c_1)(2\sigma_{xy} + c_2)}{(\mu_x^2 +
+        \mu_y^2 + c_1)(\sigma_x^2 + \sigma_y^2 + c_2)}
+
+      In these formulas:
+        - :math:`\mu_x` and :math:`\mu_y` are the local means of :math:`x` and
+        :math:`y`.
+        - :math:`\sigma_x^2` and :math:`\sigma_y^2` are the local variances of
+        :math:`x` and :math:`y`.
+        - :math:`\sigma_{xy}` is the local covariance of :math:`x` and
+        :math:`y`.
+        - :math:`c_1 = (K_1 L)^2` and :math:`c_2 = (K_2 L)^2` are stabilization
+        constants,
+          where :math:`L` is the dynamic range of pixel values, and :math:`K_1,
+          K_2` are small constants (e.g., 0.01 and 0.03).
+  """
+
+  @staticmethod
+  def _calculate_ssim(
+      img1: jnp.ndarray,
+      img2: jnp.ndarray,
+      max_val: float,
+      filter_size: int = 11,
+      filter_sigma: float = 1.5,
+      k1: float = 0.01,
+      k2: float = 0.03,
+  ) -> jnp.ndarray:
+    """Computes SSIM (Structural Similarity Index Measure) values for a batch of images.
+
+    This function calculates the SSIM between two batches of images (`img1` and
+    `img2`). If the images have multiple channels, SSIM is calculated for each
+    channel independently, and then the mean SSIM across channels is returned.
+
+    Args:
+      img1: The first batch of images, expected shape ``(batch, height, width,
+        channels)``.
+      img2: The second batch of images, expected shape ``(batch, height, width,
+        channels)``.
+      max_val: The dynamic range of the pixel values (e.g., 1.0 for images
+        normalized to [0,1] or 255 for uint8 images).
+      filter_size: The size of the Gaussian filter window used for calculating
+        local statistics. Must be an odd integer.
+      filter_sigma: The standard deviation of the Gaussian filter.
+      k1: A small constant used in the SSIM formula to stabilize the luminance
+        comparison.
+      k2: A small constant used in the SSIM formula to stabilize the
+        contrast/structure comparison.
+
+    Returns:
+      A 1D JAX array of shape ``(batch,)`` containing the SSIM value for each
+      image pair in the batch.
+    """
+    if img1.shape != img2.shape:
+      raise ValueError(
+          f'Input images must have the same shape, but got {img1.shape} and'
+          f' {img2.shape}'
+      )
+    if img1.ndim != 4:  # (batch, H, W, C)
+      raise ValueError(
+          'Input images must be 4D tensors (batch, height, width, channels),'
+          f' but got {img1.ndim}D'
+      )
+    if img1.shape[-3] < filter_size or img1.shape[-2] < filter_size:
+      raise ValueError(
+          f'Image dimensions ({img1.shape[-3]}x{img1.shape[-2]}) must be at'
+          f' least filter_size x filter_size ({filter_size}x{filter_size}).'
+      )
+
+    num_channels = img1.shape[-1]
+    img1 = img1.astype(jnp.float32)
+    img2 = img2.astype(jnp.float32)
+
+    gaussian_kernal_1d = _gaussian_kernel1d(
+        filter_sigma, (filter_size - 1) // 2
+    )
+    gaussian_kernel_2d = jnp.outer(gaussian_kernal_1d, gaussian_kernal_1d)
+    # Kernel for convolution: (H_k, W_k, C_in=1, C_out=1)
+    kernel_conv = gaussian_kernel_2d[:, :, jnp.newaxis, jnp.newaxis]
+
+    c1 = (k1 * max_val) ** 2
+    c2 = (k2 * max_val) ** 2
+
+    def _calculate_ssim_for_channel(x_ch, y_ch, conv_kernel, c1, c2):
+      r"""Calculates the Structural Similarity Index (SSIM) for a single channel.
+
+      This function computes the SSIM between two single-channel image arrays
+      (:math:`x_{ch}` and :math:`y_{ch}`) using a precomputed Gaussian kernel
+      for local statistics. The SSIM metric quantifies image quality
+      degradation based on perceived changes in structural information, also
+      incorporating important perceptual phenomena like luminance and contrast
+      masking.
+
+      The general SSIM formula considers three components: luminance (l),
+      contrast (c), and structure (s):
+
+      .. math::
+        SSIM(x, y) = [l(x, y)]^\alpha \cdot [c(x, y)]^\beta \cdot [s(x,
+        y)]^\gamma
+
+      Where:
+        - Luminance comparison:
+          :math:`l(x, y) = \frac{2\mu_x\mu_y + c_1}{\mu_x^2 + \mu_y^2 + c_1}`
+        - Contrast comparison:
+          :math:`c(x, y) = \frac{2\sigma_x\sigma_y + c_2}{\sigma_x^2 +
+          \sigma_y^2 + c_2}`
+        - Structure comparison:
+          :math:`s(x, y) = \frac{\sigma_{xy} + c_3}{\sigma_x\sigma_y + c_3}`
+
+      This implementation uses a common simplified form where :math:`\alpha =
+      \beta = \gamma = 1` and :math:`c_3 = c_2 / 2`.
+
+      This leads to the combined formula:
+
+      .. math::
+        SSIM(x, y) = \frac{(2\mu_x\mu_y + c_1)(2\sigma_{xy} + c_2)}{(\mu_x^2 +
+        \mu_y^2 + c_1)(\sigma_x^2 + \sigma_y^2 + c_2)}
+
+      In these formulas:
+        - :math:`\mu_x` and :math:`\mu_y` are the local means of :math:`x` and
+        :math:`y`.
+        - :math:`\sigma_x^2` and :math:`\sigma_y^2` are the local variances of
+        :math:`x` and :math:`y`.
+        - :math:`\sigma_{xy}` is the local covariance of :math:`x` and
+        :math:`y`.
+        - :math:`c_1 = (K_1 L)^2` and :math:`c_2 = (K_2 L)^2` are stabilization
+        constants,
+          where :math:`L` is the dynamic range of pixel values, and :math:`K_1,
+          K_2` are small constants (e.g., 0.01 and 0.03).
+
+      Args:
+        x_ch (jnp.ndarray): The first input image channel. Expected shape is
+          ``(batch, Height, Width, 1)``.
+        y_ch (jnp.ndarray): The second input image channel. Expected shape is
+          ``(batch, Height, Width, 1)``.
+        conv_kernel (jnp.ndarray): The 2D Gaussian kernel, reshaped to 4D, used
+          for calculating local windowed statistics (mean, variance,
+          covariance). Expected shape is ``(Kernel_H, Kernel_W, 1, 1)``.
+        c1 (float): Stabilization constant for the luminance and mean component,
+          :math:`(K_1 L)^2`.
+        c2 (float): Stabilization constant for the variance and covariance
+          component, :math:`(K_2 L)^2`.
+
+      Returns:
+        jnp.ndarray: A scalar JAX array (or an array of scalars if batch size >
+        1)
+        representing the mean SSIM value(s) for the input channel(s).
+      """
+      # x_ch, y_ch are (batch, H, W, 1)
+      dn = lax.conv_dimension_numbers(
+          x_ch.shape, conv_kernel.shape, ('NHWC', 'HWIO', 'NHWC')
+      )
+
+      mu_x = lax.conv_general_dilated(
+          x_ch,
+          conv_kernel,
+          window_strides=(1, 1),
+          padding='VALID',
+          dimension_numbers=dn,
+      )
+      mu_y = lax.conv_general_dilated(
+          y_ch,
+          conv_kernel,
+          window_strides=(1, 1),
+          padding='VALID',
+          dimension_numbers=dn,
+      )
+
+      mu_x_sq = mu_x**2
+      mu_y_sq = mu_y**2
+      mu_x_mu_y = mu_x * mu_y
+
+      sigma_x_sq = (
+          lax.conv_general_dilated(
+              x_ch**2,
+              conv_kernel,
+              window_strides=(1, 1),
+              padding='VALID',
+              dimension_numbers=dn,
+          )
+          - mu_x_sq
+      )
+      sigma_y_sq = (
+          lax.conv_general_dilated(
+              y_ch**2,
+              conv_kernel,
+              window_strides=(1, 1),
+              padding='VALID',
+              dimension_numbers=dn,
+          )
+          - mu_y_sq
+      )
+      sigma_xy = (
+          lax.conv_general_dilated(
+              x_ch * y_ch,
+              conv_kernel,
+              window_strides=(1, 1),
+              padding='VALID',
+              dimension_numbers=dn,
+          )
+          - mu_x_mu_y
+      )
+
+      numerator1 = 2 * mu_x_mu_y + c1
+      numerator2 = 2 * sigma_xy + c2
+      denominator1 = mu_x_sq + mu_y_sq + c1
+      denominator2 = sigma_x_sq + sigma_y_sq + c2
+
+      ssim_map = (numerator1 * numerator2) / (denominator1 * denominator2)
+      return jnp.mean(
+          ssim_map, axis=(1, 2, 3)
+      )  # Mean over H, W, C (which is 1 here for the map)
+
+    ssim_per_channel_list = []
+    for i in range(num_channels):
+      img1_c = lax.dynamic_slice_in_dim(
+          img1, i * 1, 1, axis=3
+      )  # (batch, H, W, 1)
+      img2_c = lax.dynamic_slice_in_dim(
+          img2, i * 1, 1, axis=3
+      )  # (batch, H, W, 1)
+
+      ssim_for_channel = _calculate_ssim_for_channel(
+          img1_c, img2_c, kernel_conv, c1, c2
+      )
+      ssim_per_channel_list.append(ssim_for_channel)
+
+    ssim_scores_stacked = jnp.stack(
+        ssim_per_channel_list, axis=-1
+    )  # (batch, num_channels)
+    return jnp.mean(ssim_scores_stacked, axis=-1)  # (batch,)
+
+  @classmethod
+  def from_model_output(  # type: ignore[override]
+      cls,
+      predictions: jax.Array,  # Represents predicted images (y_pred)
+      targets: jax.Array,  # Represents ground truth images (y_true)
+      max_val: float,  # Dynamic range of pixel values
+      filter_size: int = 11,
+      filter_sigma: float = 1.5,
+      k1: float = 0.01,
+      k2: float = 0.03,
+  ) -> 'SSIM':
+    """Computes SSIM for a batch of images and creates an SSIM metric instance.
+
+    This method takes batches of predicted and target images, calculates their
+    SSIM values, and then initializes an SSIM metric object suitable for
+    aggregation across multiple batches.
+
+    Args:
+        predictions: A JAX array of predicted images, with shape ``(batch,
+          height, width, channels)``.
+        targets: A JAX array of ground truth images, with shape ``(batch,
+          height, width, channels)``.
+        max_val: The maximum possible pixel value (dynamic range) of the images
+          (e.g., 1.0 for float images in [0,1], 255 for uint8 images).
+        filter_size: The size of the Gaussian filter window used in SSIM
+          calculation (default is 11).
+        filter_sigma: The standard deviation of the Gaussian filter (default is
+          1.5).
+        k1: SSIM stability constant for the luminance term (default is 0.01).
+        k2: SSIM stability constant for the contrast/structure term (default is
+          0.03).
+
+    Returns:
+        An SSIM instance containing the SSIM values for the current batch,
+        ready for averaging.
+    """
+    # shape (batch_size,)
+    batch_ssim_values = cls._calculate_ssim(
+        predictions,
+        targets,
+        max_val=max_val,
+        filter_size=filter_size,
+        filter_sigma=filter_sigma,
+        k1=k1,
+        k2=k2,
+    )
+    return super().from_model_output(values=batch_ssim_values)