Merge branch 'devel' of github.com:HLT-ISTI/QuaPy into devel

AlexMoreo · AlexMoreo · commit 1c733f3d77fc · 2024-11-29T10:57:14.000+01:00
diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt
@@ -1,3 +1,12 @@
+Change Log 0.1.10
+-----------------
+
+- Added (aggregative) bootstrap for deriving confidence regions (confidence intervals, ellipses in the simplex, or
+    ellipses in the CLR space). This method is efficient as it leverages the two-phases of the aggregative quantifiers.
+    This method applies resampling only to the aggregation phase, thus avoiding to train many quantifiers, or
+    classify multiple times the instances of a sample. See the new example no. 15.
+
+
 Change Log 0.1.9
 ----------------
 
diff --git a/TODO.txt b/TODO.txt
@@ -1,3 +1,6 @@
+- [TODO] adapt BayesianCC to WithConfidence interface
+- [TODO] Test the return_type="index" in protocols and finish the "distributin_samples.py" example
+- [TODO] Add EDy (an implementation is available at quantificationlib)
 - [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
 - [TODO] add HistNetQ
 - [TODO] add CDE-iteration and Bayes-CDE methods
diff --git a/examples/0.basics.py b/examples/0.basics.py
@@ -33,10 +33,8 @@
 print(f'training prevalence = {F.strprev(train.prevalence())}')
 
 # let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier
-# classifier = LogisticRegression()
-
-# pacc = qp.method.aggregative.PACC(classifier)
-pacc = qp.method.aggregative.PACC()
+classifier = LogisticRegression()
+pacc = qp.method.aggregative.PACC(classifier)
 
 print(f'training {pacc}')
 pacc.fit(train)
diff --git a/examples/15.confidence_regions.py b/examples/15.confidence_regions.py
@@ -0,0 +1,78 @@
+from quapy.method.confidence import AggregativeBootstrap
+from quapy.method.aggregative import PACC
+import quapy.functional as F
+import quapy as qp
+
+"""
+Just like any other type of estimator, quantifier predictions are affected by error. It is therefore useful to provide,
+along with the point estimate (the class prevalence values) a measure of uncertainty. These, typically come in the 
+form of credible regions around the point estimate. 
+
+QuaPy implements a method for deriving confidence regions around point estimates of class prevalence based on bootstrap.
+
+Bootstrap method comes down to resampling the population several times, thus generating a series of point estimates.
+QuaPy provides a variant of bootstrap for aggregative quantifiers, that only applies resampling to the pre-classified
+instances.
+
+Let see one example:  
+"""
+
+# load some data
+data = qp.datasets.fetch_UCIMulticlassDataset('molecular')
+train, test = data.train_test
+
+# by simply wrapping an aggregative quantifier within the AggregativeBootstrap class, we can obtain confidence
+# intervals around the point estimate, in this case, at 95% of confidence
+pacc = AggregativeBootstrap(PACC(), confidence_level=0.95)
+
+with qp.util.temp_seed(0):
+    # we train the quantifier the usual way
+    pacc.fit(train)
+
+    # let us simulate some shift in the test data
+    random_prevalence = F.uniform_prevalence_sampling(n_classes=test.n_classes)
+    shifted_test = test.sampling(200, *random_prevalence)
+    true_prev = shifted_test.prevalence()
+
+    # by calling "quantify_conf", we obtain the point estimate and the confidence intervals around it
+    pred_prev, conf_intervals = pacc.quantify_conf(shifted_test.X)
+
+    # conf_intervals is an instance of ConfidenceRegionABC, which provides some useful utilities like:
+    # - coverage: a function which computes the fraction of true values that belong to the confidence region
+    # - simplex_proportion: estimates the proportion of the simplex covered by the confidence region (amplitude)
+    # ideally, we are interested in obtaining confidence regions with high level of coverage and small amplitude
+
+    # the point estimate is computed as the mean of all bootstrap predictions; let us see the prediction error
+    error = qp.error.ae(true_prev, pred_prev)
+
+    # some useful outputs
+    print(f'train prevalence: {F.strprev(train.prevalence())}')
+    print(f'test prevalence:  {F.strprev(true_prev)}')
+    print(f'point-estimate:   {F.strprev(pred_prev)}')
+    print(f'absolute error:   {error:.3f}')
+    print(f'Is the true value in the confidence region?: {conf_intervals.coverage(true_prev)==1}')
+    print(f'Proportion of simplex covered at {pacc.confidence_level*100:.1f}%: {conf_intervals.simplex_portion()*100:.2f}%')
+
+"""
+Final remarks: 
+There are various ways for performing bootstrap:
+- the population-based approach (default): performs resampling of the test instances
+    e.g., use  AggregativeBootstrap(PACC(), n_train_samples=1, n_test_samples=100, confidence_level=0.95)
+- the model-based approach: performs resampling of the training instances, thus training several quantifiers
+    e.g., use  AggregativeBootstrap(PACC(), n_train_samples=100, n_test_samples=1, confidence_level=0.95)
+    this implementation avoids retraining the classifier, and performs resampling only to train different aggregation functions 
+- the combined approach: a combination of the above
+    e.g., use  AggregativeBootstrap(PACC(), n_train_samples=100, n_test_samples=100, confidence_level=0.95)
+    this example will generate 100 x 100 predictions
+    
+There are different ways for constructing confidence regions implemented in QuaPy:
+- confidence intervals: the simplest way, and one that typically works well in practice
+    use: AggregativeBootstrap(PACC(), confidence_level=0.95, method='intervals')
+- confidence ellipse in the simplex: creates an ellipse, which lies on the probability simplex, around the point estimate
+    use: AggregativeBootstrap(PACC(), confidence_level=0.95, method='ellipse')
+- confidence ellipse in the Centered-Log Ratio (CLR) space: creates an ellipse in the CLR space (this should be 
+    convenient for taking into account the inner structure of the probability simplex)
+    use: AggregativeBootstrap(PACC(), confidence_level=0.95, method='ellipse-clr')
+"""
+
+
diff --git a/examples/distributing_samples.py b/examples/distributing_samples.py
@@ -0,0 +1,38 @@
+"""
+Imagine we want to generate many samples out of a collection, that we want to distribute for others to run their
+own experiments in the very same test samples. One naive solution would come down to applying a given protocol to
+our collection (say the artificial prevalence protocol on the 'academic-success' UCI dataset), store all those samples
+on disk and make them available online. Distributing many such samples is undesirable.
+In this example, we generate the indexes that allow anyone to regenerate the samples out of the original collection.
+"""
+
+import quapy as qp
+from quapy.method.aggregative import PACC
+from quapy.protocol import UPP
+
+data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
+train, test = data.train_test
+
+# let us train a quantifier to check whether we can actually replicate the results
+quantifier = PACC()
+quantifier.fit(train)
+
+# let us simulate our experimental results
+protocol = UPP(test, sample_size=100, repeats=100, random_state=0)
+our_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
+
+print(f'We have obtained a MAE={our_mae:.3f}')
+
+# let us distribute the indexes; we specify that we want the indexes, not the samples
+protocol = UPP(test, sample_size=100, repeats=100, random_state=0, return_type='index')
+indexes = protocol.samples_parameters()
+
+# Imagine we distribute the indexes; now we show how to replicate our experiments.
+from quapy.protocol import ProtocolFromIndex
+data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
+train, test = data.train_test
+protocol = ProtocolFromIndex(data=test, indexes=indexes)
+their_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
+
+print(f'Another lab obtains a MAE={our_mae:.3f}')
+
diff --git a/examples/ensembles.py b/examples/ensembles.py
@@ -0,0 +1,36 @@
+from sklearn.linear_model import LogisticRegression
+from statsmodels.sandbox.distributions.genpareto import quant
+
+import quapy as qp
+from quapy.protocol import UPP
+from quapy.method.aggregative import PACC, DMy, EMQ, KDEyML
+from quapy.method.meta import SCMQ
+
+qp.environ["SAMPLE_SIZE"]=100
+
+def train_and_test_model(quantifier, train, test):
+    quantifier.fit(train)
+    report = qp.evaluation.evaluation_report(quantifier, UPP(test), error_metrics=['mae', 'mrae'])
+    print(quantifier.__class__.__name__)
+    print(report.mean(numeric_only=True))
+
+
+quantifiers = [
+    PACC(),
+    DMy(),
+    EMQ(),
+    KDEyML()
+]
+
+classifier = LogisticRegression()
+
+dataset_name = qp.datasets.UCI_MULTICLASS_DATASETS[0]
+data = qp.datasets.fetch_UCIMulticlassDataset(dataset_name)
+train, test = data.train_test
+
+scmq = SCMQ(classifier, quantifiers)
+
+train_and_test_model(scmq, train, test)
+
+for quantifier in quantifiers:
+    train_and_test_model(quantifier, train, test)
diff --git a/quapy/__init__.py b/quapy/__init__.py
@@ -14,7 +14,7 @@
 from . import classification
 import os
 
-__version__ = '0.1.9'
+__version__ = '0.1.10'
 
 environ = {
     'SAMPLE_SIZE': None,
diff --git a/quapy/error.py b/quapy/error.py
@@ -298,6 +298,31 @@ def nmd(prevs, prevs_hat):
     return (1./(n-1))*np.mean(match_distance(prevs, prevs_hat))
 
 
+def bias_binary(prevs, prevs_hat):
+    """
+    Computes the (positive) bias in a binary problem. The bias is simply the difference between the
+    predicted positive value and the true positive value, so that a positive such value indicates the
+    prediction has positive bias (i.e., it tends to overestimate) the true value, and negative otherwise.
+    :math:`bias(p,\\hat{p})=\\hat{p}_1-p_1`,
+    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
+    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
+        prevalence values
+    :return: binary bias
+    """
+    assert prevs.shape[-1] == 2 and prevs.shape[-1] == 2, f'bias_binary can only be applied to binary problems'
+    return prevs_hat[...,1]-prevs[...,1]
+
+
+def mean_bias_binary(prevs, prevs_hat):
+    """
+    Computes the mean of the (positive) bias in a binary problem.
+    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
+    :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
+    :return: mean binary bias
+    """
+    return np.mean(bias_binary(prevs, prevs_hat))
+
+
 def md(prevs, prevs_hat, ERROR_TOL=1E-3):
     """
     Computes the Match Distance, under the assumption that the cost in mistaking class i with class i+1 is 1 in
@@ -338,8 +363,8 @@ def __check_eps(eps=None):
 
 
 CLASSIFICATION_ERROR = {f1e, acce}
-QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
-QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
+QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, mean_bias_binary}
+QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, bias_binary}
 QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
 CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
 QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
@@ -591,7 +591,6 @@ def _check_init_parameters(self):
         if self.norm not in ACC.NORMALIZATIONS:
             raise ValueError(f"unknown normalization; valid ones are {ACC.NORMALIZATIONS}")
 
-
     def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Estimates the misclassification rates
@@ -870,13 +869,13 @@ def aggregation_fit(self, classif_predictions: LabelledCollection, data: Labelle
         :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
         """
         pred_labels, true_labels = classif_predictions.Xy
-        self._n_and_c_labeled = confusion_matrix(y_true=true_labels, y_pred=pred_labels, labels=self.classifier.classes_)
+        self._n_and_c_labeled = confusion_matrix(y_true=true_labels, y_pred=pred_labels, labels=self.classifier.classes_).astype(float)
 
     def sample_from_posterior(self, classif_predictions):
         if self._n_and_c_labeled is None:
             raise ValueError("aggregation_fit must be called before sample_from_posterior")
 
-        n_c_unlabeled = F.counts_from_labels(classif_predictions, self.classifier.classes_)
+        n_c_unlabeled = F.counts_from_labels(classif_predictions, self.classifier.classes_).astype(float)
 
         self._samples = _bayesian.sample_posterior(
             n_c_unlabeled=n_c_unlabeled,
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
diff --git a/quapy/method/meta.py b/quapy/method/meta.py
diff --git a/quapy/protocol.py b/quapy/protocol.py
diff --git a/quapy/tests/test_modsel.py b/quapy/tests/test_modsel.py