evidentlyai
diff --git a/‎examples/cookbook/metrics.ipynb
Lines changed: 197 additions & 123 deletions b/‎examples/cookbook/metrics.ipynb
Lines changed: 197 additions & 123 deletions
diff --git a/‎examples/future_examples/upload_snapshots.ipynb
Lines changed: 166 additions & 0 deletions b/‎examples/future_examples/upload_snapshots.ipynb
Lines changed: 166 additions & 0 deletions
diff --git a/‎requirements.min.txt
Lines changed: 1 addition & 1 deletion b/‎requirements.min.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.py
Lines changed: 1 addition & 1 deletion b/‎setup.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/evidently/calculations/classification_performance.py
Lines changed: 5 additions & 4 deletions b/‎src/evidently/calculations/classification_performance.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/evidently/core.py
Lines changed: 3 additions & 0 deletions b/‎src/evidently/core.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/evidently/features/contains_link_feature.py
Lines changed: 1 addition & 3 deletions b/‎src/evidently/features/contains_link_feature.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/evidently/features/is_valid_json_feature.py
Lines changed: 1 addition & 3 deletions b/‎src/evidently/features/is_valid_json_feature.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/evidently/features/is_valid_python_feature.py
Lines changed: 1 addition & 3 deletions b/‎src/evidently/features/is_valid_python_feature.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/evidently/features/is_valid_sql_feature.py
Lines changed: 1 addition & 3 deletions b/‎src/evidently/features/is_valid_sql_feature.py
Lines changed: 1 addition & 3 deletions
@@ -0,0 +1,166 @@
+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ],
+   "id": "874e0a0fe3d5bb40",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "from evidently import ColumnType\n",
+    "from evidently.future.report import Report\n",
+    "from evidently.future.datasets import BinaryClassification, Regression\n",
+    "from evidently.future.datasets import ColumnInfo\n",
+    "from evidently.future.datasets import DataDefinition\n",
+    "from evidently.future.descriptors import TextLength\n",
+    "import pandas as pd\n",
+    "from evidently.future.datasets import Dataset\n",
+    "from evidently.future.presets.classification import ClassificationQuality\n",
+    "from evidently.future.tests import lt"
+   ],
+   "id": "6d19a877eacd9045",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "from evidently.future.presets import DataSummaryPreset\n",
+    "\n",
+    "num_rows = 20\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "# Generate numerical data with some missing values\n",
+    "num_col1 = np.random.randint(1, 100, num_rows).astype(float)\n",
+    "num_col2 = np.random.uniform(10, 500, num_rows)\n",
+    "num_col1[5] = np.nan  \n",
+    "num_col2[12] = np.nan  \n",
+    "\n",
+    "# Generate categorical data with some missing values\n",
+    "cat_col1 = np.random.choice(['A', 'B', 'C'], num_rows)\n",
+    "cat_col2 = np.random.choice(['X', 'Y', 'Z'], num_rows)\n",
+    "cat_col1[3] = np.nan \n",
+    "cat_col2[8] = np.nan \n",
+    "\n",
+    "# Generate text data with some missing values\n",
+    "text_col = np.random.choice(['Hello world', 'Test string', 'Sample text', 'Random text'], num_rows)\n",
+    "text_col[6] = np.nan \n",
+    "\n",
+    "# Generate datetime data with some missing values\n",
+    "date_col = pd.date_range(start='2025-01-01', periods=num_rows, freq='D')\n",
+    "date_col = date_col.to_series().astype(\"object\")  # Convert to object to allow NaNs\n",
+    "date_col.iloc[10] = np.nan \n",
+    "\n",
+    "# Create DataFrame\n",
+    "df = pd.DataFrame({\n",
+    "    'Numerical_1': num_col1,\n",
+    "    'Numerical_2': num_col2,\n",
+    "    'Categorical_1': cat_col1,\n",
+    "    'Categorical_2': cat_col2,\n",
+    "    'Text': text_col,\n",
+    "    'Datetime': date_col.values, \n",
+    "    'Datetime2': date_col.values,\n",
+    "    'Datetime3': date_col.values,\n",
+    "})\n",
+    "\n",
+    "report = Report(\n",
+    "    [\n",
+    "        DataSummaryPreset(row_count_tests=[lt(1)])\n",
+    "    ],\n",
+    "    tags=[\"t2\"],\n",
+    ")\n",
+    "\n",
+    "report.set_model_id(\"m2\")\n",
+    "\n",
+    "snapshot = report.run(df, None, metadata={\"metadata_item\": \"meta_value\"}, tags=[\"t3\"])"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "snapshot",
+   "id": "4f09cb9ac4f36265",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "import uuid\n",
+    "from evidently.future.workspace import CloudWorkspace\n",
+    "\n",
+    "client = CloudWorkspace(token=\"\", url=\"http://localhost:8003\")\n",
+    "client.add_run(uuid.UUID(\"01956698-b6d3-7ab0-9add-776f1a77ba78\"), snapshot)"
+   ],
+   "id": "aa73787e3b5151c7",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "from evidently.ui.workspace import CloudWorkspace\n",
+    "import uuid\n",
+    "\n",
+    "client = CloudWorkspace(token=\"\", url=\"http://localhost:8003\")\n",
+    "client.add_run(uuid.UUID(\"0195d6d0-ee9e-7b79-be49-a790c3a0692e\"), snapshot, include_data=True)"
+   ],
+   "id": "514d62511c317e01",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "2c13c4f2a9ffea3c",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "1a74a64406ca5910",
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -8,7 +8,7 @@ nltk==3.6.7
 scipy==1.10.0
 requests==2.32.0
 PyYAML==5.4
-pydantic==1.10.13
+pydantic==1.10.16
 litestar==2.8.3
 typing-inspect==0.9.0
 uvicorn==0.22.0
 
@@ -60,7 +60,7 @@
         "scipy>=1.10.0",
         "requests>=2.32.0",
         "PyYAML>=5.4",
-        "pydantic>=1.10.13",
+        "pydantic>=1.10.16",
         "litestar>=2.8.3",
         "typing-inspect>=0.9.0",
         "uvicorn[standard]>=0.22.0",
 
@@ -14,6 +14,7 @@
 from pandas.core.dtypes.common import is_string_dtype
 from sklearn import metrics
 
+from evidently.core import Label
 from evidently.metric_results import Boxes
 from evidently.metric_results import ConfusionMatrix
 from evidently.metric_results import DatasetClassificationQuality
@@ -27,8 +28,8 @@
 
 
 def calculate_confusion_by_classes(
-    confusion_matrix: np.ndarray, class_names: Sequence[Union[str, int]]
-) -> Dict[Union[str, int], Dict[str, int]]:
+    confusion_matrix: np.ndarray, class_names: Sequence[Union[str, int, None]]
+) -> Dict[Label, Dict[str, int]]:
     """Calculate metrics:
     - TP (true positive)
     - TN (true negative)
@@ -319,8 +320,8 @@ def calculate_lift_table(binded):
     return result
 
 
-def calculate_matrix(target: pd.Series, prediction: pd.Series, labels: List[Union[str, int]]) -> ConfusionMatrix:
-    sorted_labels = sorted(labels)
+def calculate_matrix(target: pd.Series, prediction: pd.Series, labels: List[Label]) -> ConfusionMatrix:
+    sorted_labels = sorted(labels)  # type: ignore[type-var]
     matrix = metrics.confusion_matrix(target, prediction, labels=sorted_labels)
     return ConfusionMatrix(labels=sorted_labels, values=[row.tolist() for row in matrix])
 
 
@@ -36,6 +36,9 @@
 IncludeOptions = Union["AbstractSetIntStr", "MappingIntStrAny"]
 
 
+Label = Union[int, str, None]
+
+
 class ColumnType(Enum):
     Numerical = "num"
     Categorical = "cat"
 
@@ -15,12 +15,10 @@ class Config:
 
     __feature_type__: ClassVar = ColumnType.Categorical
     display_name_template: ClassVar = "{column_name} contains link"
-    column_name: str
 
     def __init__(self, column_name: str, display_name: Optional[str] = None):
-        self.column_name = column_name
         self.display_name = display_name
-        super().__init__()
+        super().__init__(column_name=column_name)
 
     def apply(self, value: Any):
         if value is None or (isinstance(value, float) and np.isnan(value)):
 
@@ -13,12 +13,10 @@ class Config:
 
     __feature_type__: ClassVar = ColumnType.Categorical
     display_name_template: ClassVar = "JSON valid for {column_name}"
-    column_name: str
 
     def __init__(self, column_name: str, display_name: Optional[str] = None):
-        self.column_name = column_name
         self.display_name = display_name
-        super().__init__()
+        super().__init__(column_name=column_name)
 
     def apply(self, value: Any):
         try:
 
@@ -13,12 +13,10 @@ class Config:
 
     __feature_type__: ClassVar = ColumnType.Categorical
     display_name_template: ClassVar = "Valid Python for {column_name}"
-    column_name: str
 
     def __init__(self, column_name: str, display_name: Optional[str] = None):
-        self.column_name = column_name
         self.display_name = display_name
-        super().__init__()
+        super().__init__(column_name=column_name)
 
     def apply(self, value: Any) -> bool:
         try:
 
@@ -12,12 +12,10 @@ class Config:
 
     __feature_type__: ClassVar = ColumnType.Categorical
     display_name_template: ClassVar = "SQL Validity Check for {column_name}"
-    column_name: str
 
     def __init__(self, column_name: str, display_name: Optional[str] = None):
-        self.column_name = column_name
         self.display_name = display_name
-        super().__init__()
+        super().__init__(column_name=column_name)
 
     def apply(self, value: Any):
         if value is None or not isinstance(value, str):