|
5 | 5 | from matplotlib.axes import Axes |
6 | 6 | from matplotlib.figure import Figure |
7 | 7 |
|
8 | | -from autoemulate.core.types import NumpyLike, TensorLike |
| 8 | +from autoemulate.core.types import DistributionLike, GaussianLike, NumpyLike, TensorLike |
9 | 9 | from autoemulate.emulators.base import Emulator |
10 | 10 |
|
11 | 11 |
|
@@ -236,7 +236,6 @@ def mean_and_var_surface( |
236 | 236 | The predicted variance on the grid. |
237 | 237 | grid: list[TensorLike] |
238 | 238 | The grid of parameter values used for predictions. |
239 | | -
|
240 | 239 | """ |
241 | 240 | # Determine which parameters to vary and which to fix |
242 | 241 | grid_params = {} |
@@ -412,8 +411,162 @@ def create_and_plot_slice( |
412 | 411 | param_pair_names, |
413 | 412 | vmin, |
414 | 413 | vmax, |
415 | | - fixed_params_info=f"{', '.join(fixed_params)} at {quantile:.1f} quantile" |
416 | | - if len(fixed_params) > 0 |
417 | | - else "None", |
| 414 | + fixed_params_info=( |
| 415 | + f"{', '.join(fixed_params)} at {quantile:.1f} quantile" |
| 416 | + if len(fixed_params) > 0 |
| 417 | + else "None" |
| 418 | + ), |
418 | 419 | ) |
419 | 420 | return fig, ax |
| 421 | + |
| 422 | + |
| 423 | +def coverage_from_distributions( |
| 424 | + y_pred: DistributionLike, |
| 425 | + y_true: TensorLike, |
| 426 | + levels: list[float] | NumpyLike | TensorLike | None = None, |
| 427 | + n_samples: int = 2000, |
| 428 | + joint: bool = False, |
| 429 | +) -> tuple[NumpyLike, NumpyLike]: |
| 430 | + """Compute empirical coverage for a set of nominal confidence levels. |
| 431 | +
|
| 432 | + Parameters |
| 433 | + ---------- |
| 434 | + y_pred: DistributionLike |
| 435 | + The emulator predicted distribution. |
| 436 | + y_true: TensorLike |
| 437 | + The true values. |
| 438 | + levels: array-like, optional |
| 439 | + Nominal coverage levels (between 0 and 1). If None, a default grid is |
| 440 | + used. Defaults to None. |
| 441 | + n_samples: int |
| 442 | + Number of Monte-Carlo samples to draw from the predictive |
| 443 | + distribution to compute empirical intervals if analytical quantiles |
| 444 | + are not available. |
| 445 | + joint: bool |
| 446 | + If True and the predictive outputs are multivariate, compute joint |
| 447 | + coverage (i.e., the true vector must lie inside the interval for all |
| 448 | + dimensions). If False (default), compute marginal coverage per output |
| 449 | + dimension and return the mean across data points. |
| 450 | +
|
| 451 | + Returns |
| 452 | + ------- |
| 453 | + levels: np.ndarray |
| 454 | + Nominal coverage levels. |
| 455 | + empirical: np.ndarray |
| 456 | + Empirical coverages. Shape is (len(levels), output_dim) when |
| 457 | + `joint=False` and output_dim>1, or (len(levels),) when joint=True or |
| 458 | + output_dim==1. |
| 459 | + """ |
| 460 | + if levels is None: |
| 461 | + levels = np.linspace(0.0, 1.0, 51) |
| 462 | + levels = np.asarray(levels) |
| 463 | + |
| 464 | + # if dist.icdf not available, compute empirical intervals using sample quantiles |
| 465 | + samples = None |
| 466 | + y_dist = None |
| 467 | + if isinstance(y_pred, GaussianLike): |
| 468 | + y_dist = y_pred |
| 469 | + elif isinstance(y_pred, torch.distributions.Independent) and isinstance( |
| 470 | + y_pred.base_dist, GaussianLike |
| 471 | + ): |
| 472 | + y_dist = y_pred.base_dist |
| 473 | + else: |
| 474 | + samples = y_pred.sample((n_samples,)) |
| 475 | + |
| 476 | + empirical_list = [] |
| 477 | + for p in levels: |
| 478 | + lower_q = (1.0 - p) / 2.0 |
| 479 | + upper_q = 1.0 - lower_q |
| 480 | + |
| 481 | + if y_dist is not None: |
| 482 | + lower = y_dist.icdf(lower_q) |
| 483 | + upper = y_dist.icdf(upper_q) |
| 484 | + else: |
| 485 | + assert samples is not None |
| 486 | + lower = torch.quantile(samples, float(lower_q), dim=0) |
| 487 | + upper = torch.quantile(samples, float(upper_q), dim=0) |
| 488 | + |
| 489 | + inside = (y_true >= lower) & (y_true <= upper) |
| 490 | + if joint: |
| 491 | + inside_all = inside.all(dim=-1) |
| 492 | + empirical = inside_all.float().mean().item() |
| 493 | + else: |
| 494 | + # marginal per-dim coverage |
| 495 | + empirical = inside.float().mean(dim=0).cpu().numpy() |
| 496 | + empirical_list.append(empirical) |
| 497 | + |
| 498 | + empirical_arr = np.asarray(empirical_list) |
| 499 | + |
| 500 | + return levels, empirical_arr |
| 501 | + |
| 502 | + |
| 503 | +def plot_calibration_from_distributions( |
| 504 | + y_pred: DistributionLike, |
| 505 | + y_true: TensorLike, |
| 506 | + levels: np.ndarray | None = None, |
| 507 | + n_samples: int = 2000, |
| 508 | + joint: bool = False, |
| 509 | + title: str | None = None, |
| 510 | + legend: bool = True, |
| 511 | + figsize: tuple[int, int] | None = None, |
| 512 | +): |
| 513 | + """Plot calibration curve(s) given predictive distributions and true values. |
| 514 | +
|
| 515 | + This draws empirical coverage (y-axis) against nominal coverage (x-axis). |
| 516 | +
|
| 517 | + When points lie above or below the diagonal, this indicates that uncertainty |
| 518 | + is respectively being overestimated or underestimated. |
| 519 | +
|
| 520 | + Parameters |
| 521 | + ---------- |
| 522 | + y_pred: DistributionLike |
| 523 | + The emulator predicted distribution. |
| 524 | + y_true: TensorLike |
| 525 | + The true values. |
| 526 | + levels: array-like, optional |
| 527 | + Nominal coverage levels (between 0 and 1). If None, a default grid is |
| 528 | + used. |
| 529 | + n_samples: int |
| 530 | + Number of Monte-Carlo samples to draw from the predictive |
| 531 | + distribution to compute empirical intervals. |
| 532 | + joint: bool |
| 533 | + If True and the predictive outputs are multivariate, compute joint |
| 534 | + coverage (i.e., the true vector must lie inside the interval for all |
| 535 | + dimensions). If False (default), compute marginal coverage per output |
| 536 | + dimension and return the mean across data points. |
| 537 | + title: str | None |
| 538 | + An optional title for the plot. Defaults to None (no title). |
| 539 | + legend: bool |
| 540 | + Whether to display a legend. Defaults to True. |
| 541 | + figsize: tuple[int, int] | None |
| 542 | + The size of the figure to create. If None, a default size is used. |
| 543 | + """ |
| 544 | + levels, empirical = coverage_from_distributions( |
| 545 | + y_pred, y_true, levels=levels, n_samples=n_samples, joint=joint |
| 546 | + ) |
| 547 | + |
| 548 | + if figsize is None: |
| 549 | + figsize = (6, 6) |
| 550 | + fig, ax = plt.subplots(figsize=figsize) |
| 551 | + |
| 552 | + if len(empirical.shape) == 1 or empirical.shape[1] == 1: |
| 553 | + ax.plot(levels, empirical, marker="o", label="empirical") |
| 554 | + else: |
| 555 | + # multiple outputs: plot each dimension |
| 556 | + for i in range(empirical.shape[1]): |
| 557 | + ax.plot(levels, empirical[:, i], marker="o", label=f"$y_{i}$") |
| 558 | + |
| 559 | + # diagonal reference |
| 560 | + ax.plot([0, 1], [0, 1], linestyle="--", color="gray", label="ideal") |
| 561 | + ax.set_xlim(0, 1) |
| 562 | + ax.set_ylim(0, 1) |
| 563 | + ax.set_xlabel("Expected coverage") |
| 564 | + ax.set_ylabel("Observed coverage") |
| 565 | + |
| 566 | + if title: |
| 567 | + ax.set_title(title) |
| 568 | + ax.grid(alpha=0.3) |
| 569 | + if legend: |
| 570 | + ax.legend() |
| 571 | + |
| 572 | + return fig, ax |
0 commit comments