Change cumulative curve like roc_curve reiinakano#98

celik-muhammed · celik-muhammed · commit bdf111650feb · 2024-08-16T17:36:53.000Z
diff --git a/scikitplot/metrics.py b/scikitplot/metrics.py
@@ -336,11 +336,13 @@ def plot_roc_curve(y_true, y_probas, title='ROC Curves',
     return ax
 
 
-def plot_roc(y_true, y_probas, title='ROC Curves',
-             plot_micro=True, plot_macro=True, classes_to_plot=None,
-             ax=None, figsize=None, cmap='nipy_spectral',
-             title_fontsize="large", text_fontsize="medium",
-             show_labels=True,):
+def plot_roc(
+    y_true, y_probas, title='ROC Curves',
+    plot_micro=True, plot_macro=True, classes_to_plot=None,
+    ax=None, figsize=None, cmap='nipy_spectral',
+    title_fontsize="large", text_fontsize="medium",
+    show_labels=True, digits=3,
+):
     """Generates the ROC curves from labels and predicted scores/probabilities
 
     Args:
@@ -386,6 +388,9 @@ def plot_roc(y_true, y_probas, title='ROC Curves',
         show_labels (boolean, optional): Shows the labels in the plot.
             Defaults to ``True``.
 
+        digits (int, optional): Number of digits for formatting output floating point values.
+            Use e.g. 2 or 4. Defaults to 3.
+
     Returns:
         ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
             drawn.
@@ -428,8 +433,8 @@ def plot_roc(y_true, y_probas, title='ROC Curves',
             roc_auc = auc(fpr_dict[i], tpr_dict[i])
             color = plt.cm.get_cmap(cmap)(float(i) / len(classes))
             ax.plot(fpr_dict[i], tpr_dict[i], lw=2, color=color,
-                    label='ROC curve of class {0} (area = {1:0.2f})'
-                          ''.format(classes[i], roc_auc))
+                    label='ROC curve of class {0} (area = {1:.{digits}f})'
+                          ''.format(classes[i], roc_auc, digits=digits))
 
     if plot_micro:
         binarized_y_true = label_binarize(y_true, classes=classes)
@@ -440,7 +445,7 @@ def plot_roc(y_true, y_probas, title='ROC Curves',
         roc_auc = auc(fpr, tpr)
         ax.plot(fpr, tpr,
                 label='micro-average ROC curve '
-                      '(area = {0:0.2f})'.format(roc_auc),
+                      '(area = {0:.{digits}f})'.format(roc_auc, digits=digits),
                 color='deeppink', linestyle=':', linewidth=4)
 
     if plot_macro:
@@ -459,7 +464,7 @@ def plot_roc(y_true, y_probas, title='ROC Curves',
 
         ax.plot(all_fpr, mean_tpr,
                 label='macro-average ROC curve '
-                      '(area = {0:0.2f})'.format(roc_auc),
+                      '(area = {0:.{digits}f})'.format(roc_auc, digits=digits),
                 color='navy', linestyle=':', linewidth=4)
 
     ax.plot([0, 1], [0, 1], 'k--', lw=2)
@@ -475,7 +480,7 @@ def plot_roc(y_true, y_probas, title='ROC Curves',
 
 def plot_ks_statistic(y_true, y_probas, title='KS Statistic Plot',
                       ax=None, figsize=None, title_fontsize="large",
-                      text_fontsize="medium"):
+                      text_fontsize="medium", digits=3):
     """Generates the KS Statistic plot from labels and scores/probabilities
 
     Args:
@@ -503,6 +508,9 @@ def plot_ks_statistic(y_true, y_probas, title='KS Statistic Plot',
             Use e.g. "small", "medium", "large" or integer-values. Defaults to
             "medium".
 
+        digits (int, optional): Number of digits for formatting output floating point values.
+            Use e.g. 2 or 4. Defaults to 3.
+
     Returns:
         ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
             drawn.
@@ -543,9 +551,10 @@ def plot_ks_statistic(y_true, y_probas, title='KS Statistic Plot',
     ax.plot(thresholds, pct2, lw=3, label='Class {}'.format(classes[1]))
     idx = np.where(thresholds == max_distance_at)[0][0]
     ax.axvline(max_distance_at, *sorted([pct1[idx], pct2[idx]]),
-               label='KS Statistic: {:.3f} at {:.3f}'.format(ks_statistic,
-                                                             max_distance_at),
-               linestyle=':', lw=3, color='black')
+               label = 'KS Statistic: {:.{digits}f} at {:.{digits}f}'.format(
+                   ks_statistic, max_distance_at, digits=digits
+                ),
+               linestyle = ':', lw=3, color='black')
 
     ax.set_xlim([0.0, 1.0])
     ax.set_ylim([0.0, 1.0])
@@ -685,13 +694,16 @@ def plot_precision_recall_curve(y_true, y_probas,
     return ax
 
 
-def plot_precision_recall(y_true, y_probas,
-                          title='Precision-Recall Curve',
-                          plot_micro=True,
-                          classes_to_plot=None, ax=None,
-                          figsize=None, cmap='nipy_spectral',
-                          title_fontsize="large",
-                          text_fontsize="medium"):
+def plot_precision_recall(
+    y_true, y_probas,
+    title='Precision-Recall Curve',
+    plot_micro=True,
+    classes_to_plot=None, ax=None,
+    figsize=None, cmap='nipy_spectral',
+    title_fontsize="large",
+    text_fontsize="medium",
+    digits=3,
+):
     """Generates the Precision Recall Curve from labels and probabilities
 
     Args:
@@ -731,6 +743,9 @@ def plot_precision_recall(y_true, y_probas,
             Use e.g. "small", "medium", "large" or integer-values. Defaults to
             "medium".
 
+        digits (int, optional): Number of digits for formatting output floating point values.
+            Use e.g. 2 or 4. Defaults to 3.
+
     Returns:
         ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
             drawn.
@@ -778,8 +793,9 @@ def plot_precision_recall(y_true, y_probas,
             color = plt.cm.get_cmap(cmap)(float(i) / len(classes))
             ax.plot(recall, precision, lw=2,
                     label='Precision-recall curve of class {0} '
-                          '(area = {1:0.3f})'.format(classes[i],
-                                                     average_precision),
+                          '(area = {1:.{digits}f})'.format(classes[i],
+                                                     average_precision,
+                                                     digits=digits),
                     color=color)
 
     if plot_micro:
@@ -790,7 +806,7 @@ def plot_precision_recall(y_true, y_probas,
                                                     average='micro')
         ax.plot(recall, precision,
                 label='micro-average Precision-recall curve '
-                      '(area = {0:0.3f})'.format(average_precision),
+                      '(area = {0:.{digits}f})'.format(average_precision, digits=digits),
                 color='navy', linestyle=':', linewidth=4)
 
     ax.set_xlim([0.0, 1.0])
@@ -802,10 +818,12 @@ def plot_precision_recall(y_true, y_probas,
     return ax
 
 
-def plot_silhouette(X, cluster_labels, title='Silhouette Analysis',
-                    metric='euclidean', copy=True, ax=None, figsize=None,
-                    cmap='nipy_spectral', title_fontsize="large",
-                    text_fontsize="medium"):
+def plot_silhouette(
+    X, cluster_labels, title='Silhouette Analysis',
+    metric='euclidean', copy=True, ax=None, figsize=None,
+    cmap='nipy_spectral', title_fontsize="large",
+    text_fontsize="medium", digits=3,
+):
     """Plots silhouette analysis of clusters provided.
 
     Args:
@@ -847,6 +865,9 @@ def plot_silhouette(X, cluster_labels, title='Silhouette Analysis',
             Use e.g. "small", "medium", "large" or integer-values. Defaults to
             "medium".
 
+        digits (int, optional): Number of digits for formatting output floating point values.
+            Use e.g. 2 or 4. Defaults to 3.
+
     Returns:
         ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
             drawn.
@@ -908,8 +929,10 @@ def plot_silhouette(X, cluster_labels, title='Silhouette Analysis',
 
         y_lower = y_upper + 10
 
-    ax.axvline(x=silhouette_avg, color="red", linestyle="--",
-               label='Silhouette score: {0:0.3f}'.format(silhouette_avg))
+    ax.axvline(
+        x=silhouette_avg, color="red", linestyle="--",
+        label='Silhouette score: {0:.{digits}f}'.format(silhouette_avg, digits=2)
+    )
 
     ax.set_yticks([])  # Clear the y-axis labels / ticks
     ax.set_xticks(np.arange(-0.1, 1.0, 0.2))
@@ -920,11 +943,13 @@ def plot_silhouette(X, cluster_labels, title='Silhouette Analysis',
     return ax
 
 
-def plot_calibration_curve(y_true, probas_list, clf_names=None, n_bins=10,                           
-                           title='Calibration plots (Reliability Curves)',
-                           ax=None, figsize=None, cmap='nipy_spectral',
-                           title_fontsize="large", text_fontsize="medium",
-                           pos_label=None, strategy="uniform",):
+def plot_calibration_curve(
+    y_true, probas_list, clf_names=None, n_bins=10,
+    title='Calibration plots (Reliability Curves)',
+    ax=None, figsize=None, cmap='nipy_spectral',
+    title_fontsize="large", text_fontsize="medium",
+    pos_label=None, strategy="uniform",
+):
     """Plots calibration curves for a set of classifier probability estimates.
 
     Plotting the calibration curves of a classifier is useful for determining
@@ -1073,9 +1098,13 @@ def plot_calibration_curve(y_true, probas_list, clf_names=None, n_bins=10,
     return ax
 
 
-def plot_cumulative_gain(y_true, y_probas, title='Cumulative Gains Curve',
-                         ax=None, figsize=None, title_fontsize="large",
-                         text_fontsize="medium", class_names = None):
+def plot_cumulative_gain(
+    y_true, y_probas, title='Cumulative Gains Curve',
+    classes_to_plot=None, plot_micro=True, plot_macro=True,
+    ax=None, figsize=None, title_fontsize="large",
+    text_fontsize="medium", cmap='nipy_spectral',
+    class_names = None,
+):
     """Generates the Cumulative Gains Plot from labels and scores/probabilities
 
     The cumulative gains chart is used to determine the effectiveness of a
@@ -1093,6 +1122,17 @@ def plot_cumulative_gain(y_true, y_probas, title='Cumulative Gains Curve',
         title (string, optional): Title of the generated plot. Defaults to
             "Cumulative Gains Curve".
 
+        classes_to_plot (list-like, optional): Classes for which the Cumulative Gain
+            curve should be plotted. e.g. [0, 'cold']. If given class does not exist,
+            it will be ignored. If ``None``, all classes will be plotted. Defaults to
+            ``None``
+
+        plot_micro (boolean, optional): Plot the micro average ROC curve.
+            Defaults to ``True``.
+
+        plot_macro (boolean, optional): Plot the macro average ROC curve.
+            Defaults to ``True``.
+
         ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
             plot the learning curve. If None, the plot is drawn on a new set of
             axes.
@@ -1107,6 +1147,11 @@ def plot_cumulative_gain(y_true, y_probas, title='Cumulative Gains Curve',
         text_fontsize (string or int, optional): Matplotlib-style fontsizes.
             Use e.g. "small", "medium", "large" or integer-values. Defaults to
             "medium".
+        
+        cmap (string or :class:`matplotlib.colors.Colormap` instance, optional):
+            Colormap used for plotting the projection. View Matplotlib Colormap
+            documentation for available options.
+            https://matplotlib.org/users/colormaps.html
             
         class_names (list of strings, optional): List of class names. Used for
             the legend. Order should be synchronized with the order of classes
@@ -1129,28 +1174,58 @@ def plot_cumulative_gain(y_true, y_probas, title='Cumulative Gains Curve',
            :align: center
            :alt: Cumulative Gains Plot
     """
+    if ax is None:
+        fig, ax = plt.subplots(1, 1, figsize=figsize)
+    ax.set_title(title, fontsize=title_fontsize)
+
     y_true = np.array(y_true)
     y_probas = np.array(y_probas)
-
     classes = np.unique(y_true)
-    if class_names is None: class_names = classes
-    if len(classes) != 2:
+
+    if classes_to_plot is None:
+        classes_to_plot = classes
+    if class_names is None: class_names = classes_to_plot
+
+    if len(classes_to_plot) != 2:
         raise ValueError('Cannot calculate Cumulative Gains for data with '
                          '{} category/ies'.format(len(classes)))
 
-    # Compute Cumulative Gain Curves
-    percentages, gains1 = cumulative_gain_curve(y_true, y_probas[:, 0],
-                                                classes[0])
-    percentages, gains2 = cumulative_gain_curve(y_true, y_probas[:, 1],
-                                                classes[1])
+    perc_dict = dict()
+    gain_dict = dict()
 
-    if ax is None:
-        fig, ax = plt.subplots(1, 1, figsize=figsize)
+    indices_to_plot = np.isin(classes, classes_to_plot)
+    # Loop for all classes to get different class gain
+    for i, to_plot in enumerate(indices_to_plot):
+        perc_dict[i], gain_dict[i] = cumulative_gain_curve(y_true, y_probas[:, i], pos_label=classes[i])
 
-    ax.set_title(title, fontsize=title_fontsize)
+        if to_plot:
+            color = plt.cm.get_cmap(cmap)(float(i) / len(classes))
+            ax.plot(perc_dict[i], gain_dict[i], lw=2, color=color,
+                    label='Class {} Cumulative Gain curve'.format(class_names[i]))
 
-    ax.plot(percentages, gains1, lw=3, label='Class {}'.format(class_names[0]))
-    ax.plot(percentages, gains2, lw=3, label='Class {}'.format(class_names[1]))
+    # Whether or to plot macro or micro
+    if plot_micro:
+        binarized_y_true = label_binarize(y_true, classes=classes)
+        if len(classes) == 2:
+            binarized_y_true = np.hstack((1 - binarized_y_true, binarized_y_true))
+
+        perc, gain = cumulative_gain_curve(binarized_y_true.ravel(), y_probas.ravel())
+        ax.plot(perc, gain, label='micro-average Cumulative Gain curve',
+                color='deeppink', linestyle=':', linewidth=4)
+
+    if plot_macro:
+        # First aggregate all percentages
+        all_perc = np.unique(np.concatenate([perc_dict[x] for x in range(len(classes))]))
+
+        # Then interpolate all cumulative gain
+        mean_gain = np.zeros_like(all_perc)
+        for i in range(len(classes)):
+            mean_gain += np.interp(all_perc, perc_dict[i], gain_dict[i])
+
+        mean_gain /= len(classes)
+
+        ax.plot(all_perc, mean_gain, label='macro-average Cumulative Gain curve',
+                color='navy', linestyle=':', linewidth=4)
 
     ax.set_xlim([0.0, 1.0])
     ax.set_ylim([0.0, 1.0])
@@ -1159,16 +1234,19 @@ def plot_cumulative_gain(y_true, y_probas, title='Cumulative Gains Curve',
 
     ax.set_xlabel('Percentage of sample', fontsize=text_fontsize)
     ax.set_ylabel('Gain', fontsize=text_fontsize)
+
     ax.tick_params(labelsize=text_fontsize)
     ax.grid('on')
     ax.legend(loc='lower right', fontsize=text_fontsize)
 
     return ax
 
 
-def plot_lift_curve(y_true, y_probas, title='Lift Curve',
-                    ax=None, figsize=None, title_fontsize="large",
-                    text_fontsize="medium", class_names = None):
+def plot_lift_curve(
+    y_true, y_probas, title='Lift Curve',
+    ax=None, figsize=None, title_fontsize="large",
+    text_fontsize="medium", class_names = None
+):
     """Generates the Lift Curve from labels and scores/probabilities
 
     The lift curve is used to determine the effectiveness of a