Skip to content

Commit aedc290

Browse files
committed
mypy docstyle etc
1 parent 6de0e48 commit aedc290

4 files changed

Lines changed: 102 additions & 49 deletions

File tree

molpipeline/experimental/uncertainty/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
"""Experimental uncertainty wrappers for conformal prediction in MolPipeline.
2+
3+
Provides CrossConformalCV and UnifiedConformalCV for robust uncertainty quantification.
4+
"""
5+
16
from molpipeline.experimental.uncertainty.conformal import (
27
CrossConformalCV,
38
UnifiedConformalCV,

molpipeline/experimental/uncertainty/conformal.py

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ def bin_targets(y: np.ndarray, n_bins: int = 10) -> np.ndarray:
3737

3838

3939
class UnifiedConformalCV(BaseEstimator):
40-
"""One wrapper to rule them all: conformal prediction for both classifiers and
41-
regressors.
40+
"""One wrapper to rule them all: conformal prediction for both classifiers and regressors.
4241
4342
Uses crepes under the hood, so you know it's sweet.
4443
@@ -78,7 +77,29 @@ def __init__(
7877
n_jobs: int = 1,
7978
**kwargs: Any,
8079
) -> None:
81-
"""Initialize UnifiedConformalCV."""
80+
"""Initialize UnifiedConformalCV.
81+
82+
Parameters
83+
----------
84+
estimator : Any
85+
The base estimator or pipeline to wrap.
86+
mondrian : Any, optional
87+
Mondrian calibration/grouping (default: False).
88+
confidence_level : float, optional
89+
Confidence level for prediction sets/intervals (default: 0.9).
90+
estimator_type : str, optional
91+
Type of estimator: 'classifier' or 'regressor' (default: 'classifier').
92+
nonconformity : Any, optional
93+
Nonconformity function for classification.
94+
difficulty_estimator : Any, optional
95+
Difficulty estimator for normalized conformal prediction (regression).
96+
binning : Any, optional
97+
Number of bins or binning function for Mondrian calibration (regression).
98+
n_jobs : int, optional
99+
Number of parallel jobs (default: 1).
100+
**kwargs : Any
101+
Additional keyword arguments for crepes.
102+
"""
82103
self.estimator = estimator
83104
self.mondrian = mondrian
84105
self.confidence_level = confidence_level
@@ -284,8 +305,7 @@ def predict_int(self, x: np.ndarray, confidence: float | None = None) -> Any:
284305

285306

286307
class CrossConformalCV(BaseEstimator):
287-
"""Cross-conformal prediction for both classifiers and regressors using
288-
WrapClassifier/WrapRegressor.
308+
"""Cross-conformal prediction for both classifiers and regressors using WrapClassifier/WrapRegressor.
289309
290310
Handles Mondrian (class_cond) logic as described.
291311
@@ -315,7 +335,7 @@ class CrossConformalCV(BaseEstimator):
315335
Extra toppings for crepes.
316336
317337
"""
318-
338+
319339
def __init__(
320340
self,
321341
estimator: Any,
@@ -328,7 +348,29 @@ def __init__(
328348
n_bins: int = 10,
329349
**kwargs: Any,
330350
) -> None:
331-
"""Initialize CrossConformalCV."""
351+
"""Initialize CrossConformalCV.
352+
353+
Parameters
354+
----------
355+
estimator : Any
356+
The base estimator or pipeline to wrap.
357+
n_folds : int, optional
358+
Number of cross-validation folds (default: 5).
359+
confidence_level : float, optional
360+
Confidence level for prediction sets/intervals (default: 0.9).
361+
mondrian : Any, optional
362+
Mondrian calibration/grouping (default: False).
363+
nonconformity : Any, optional
364+
Nonconformity function for classification.
365+
binning : Any, optional
366+
Number of bins or binning function for Mondrian calibration (regression).
367+
estimator_type : str, optional
368+
Type of estimator: 'classifier' or 'regressor' (default: 'classifier').
369+
n_bins : int, optional
370+
Number of bins for stratified splitting in regression (default: 10).
371+
**kwargs : Any
372+
Additional keyword arguments for crepes.
373+
"""
332374
self.estimator = estimator
333375
self.n_folds = n_folds
334376
self.confidence_level = confidence_level
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1+
"""Unit tests for conformal prediction wrappers in molpipeline.experimental.uncertainty.conformal.
2+
"""
3+
14
"Uncertainty test module"

tests/test_pipeline.py

Lines changed: 45 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -376,48 +376,51 @@ def test_calibrated_classifier(self) -> None:
376376
self.assertEqual(predicted_value_array.shape, (len(TEST_SMILES),))
377377
self.assertEqual(predicted_proba_array.shape, (len(TEST_SMILES), 2))
378378

379-
def test_conformal_pipeline_classifier(self):
380-
"""Test conformal prediction with a pipeline on SMILES data."""
381-
from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV
382-
383-
# Use the global test data
384-
smiles = TEST_SMILES
385-
y = np.array(CONTAINS_OX)
386-
387-
# Build a pipeline: SMILES -> Mol -> MorganFP -> RF
388-
smi2mol = SmilesToMol()
389-
mol2morgan = MolToMorganFP(radius=2, n_bits=128)
390-
rf = RandomForestClassifier(n_estimators=10, random_state=42)
391-
pipeline = Pipeline([
392-
("smi2mol", smi2mol),
393-
("morgan", mol2morgan),
394-
("rf", rf)
395-
])
396-
397-
# Split data
398-
from sklearn.model_selection import train_test_split
399-
X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42)
400-
401-
# UnifiedConformalCV
402-
cp = UnifiedConformalCV(pipeline, estimator_type="classifier")
403-
cp.fit(X_train, y_train)
404-
cp.calibrate(X_calib, y_calib)
405-
preds = cp.predict(X_calib)
406-
probs = cp.predict_proba(X_calib)
407-
sets = cp.predict_conformal_set(X_calib)
408-
self.assertEqual(len(preds), len(y_calib))
409-
self.assertEqual(probs.shape[0], len(y_calib))
410-
self.assertEqual(len(sets), len(y_calib))
411-
412-
# CrossConformalCV
413-
ccp = CrossConformalCV(pipeline, estimator_type="classifier", n_folds=3)
414-
ccp.fit(smiles, y)
415-
preds_ccp = ccp.predict(smiles)
416-
probs_ccp = ccp.predict_proba(smiles)
417-
sets_ccp = ccp.predict_conformal_set(smiles)
418-
self.assertEqual(len(preds_ccp), len(y))
419-
self.assertEqual(probs_ccp.shape[0], len(y))
420-
self.assertEqual(len(sets_ccp), len(y))
379+
def test_conformal_pipeline_classifier(self) -> None:
380+
"""Test conformal prediction with a pipeline on SMILES data.
381+
382+
This test does not take any parameters and does not return a value.
383+
"""
384+
from molpipeline.experimental.uncertainty.conformal import UnifiedConformalCV, CrossConformalCV
385+
386+
# Use the global test data
387+
smiles = TEST_SMILES
388+
y = np.array(CONTAINS_OX)
389+
390+
# Build a pipeline: SMILES -> Mol -> MorganFP -> RF
391+
smi2mol = SmilesToMol()
392+
mol2morgan = MolToMorganFP(radius=2, n_bits=128)
393+
rf = RandomForestClassifier(n_estimators=10, random_state=42)
394+
pipeline = Pipeline([
395+
("smi2mol", smi2mol),
396+
("morgan", mol2morgan),
397+
("rf", rf)
398+
])
399+
400+
# Split data
401+
from sklearn.model_selection import train_test_split
402+
X_train, X_calib, y_train, y_calib = train_test_split(smiles, y, test_size=0.3, random_state=42)
403+
404+
# UnifiedConformalCV
405+
cp = UnifiedConformalCV(pipeline, estimator_type="classifier")
406+
cp.fit(X_train, y_train)
407+
cp.calibrate(X_calib, y_calib)
408+
preds = cp.predict(X_calib)
409+
probs = cp.predict_proba(X_calib)
410+
sets = cp.predict_conformal_set(X_calib)
411+
self.assertEqual(len(preds), len(y_calib))
412+
self.assertEqual(probs.shape[0], len(y_calib))
413+
self.assertEqual(len(sets), len(y_calib))
414+
415+
# CrossConformalCV
416+
ccp = CrossConformalCV(pipeline, estimator_type="classifier", n_folds=3)
417+
ccp.fit(smiles, y)
418+
preds_ccp = ccp.predict(smiles)
419+
probs_ccp = ccp.predict_proba(smiles)
420+
sets_ccp = ccp.predict_conformal_set(smiles)
421+
self.assertEqual(len(preds_ccp), len(y))
422+
self.assertEqual(probs_ccp.shape[0], len(y))
423+
self.assertEqual(len(sets_ccp), len(y))
421424

422425

423426
if __name__ == "__main__":

0 commit comments

Comments
 (0)