Skip to content

Commit 89d40f4

Browse files
authored
V2: Snapshot serialization and cloud SDK (#1525)
1 parent f0ef364 commit 89d40f4

File tree

67 files changed

+1525
-858
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1525
-858
lines changed

examples/cookbook/metrics.ipynb

+197-123
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
{
2+
"cells": [
3+
{
4+
"metadata": {},
5+
"cell_type": "code",
6+
"source": [
7+
"import pandas as pd\n",
8+
"import numpy as np"
9+
],
10+
"id": "874e0a0fe3d5bb40",
11+
"outputs": [],
12+
"execution_count": null
13+
},
14+
{
15+
"metadata": {},
16+
"cell_type": "code",
17+
"source": [
18+
"from evidently import ColumnType\n",
19+
"from evidently.future.report import Report\n",
20+
"from evidently.future.datasets import BinaryClassification, Regression\n",
21+
"from evidently.future.datasets import ColumnInfo\n",
22+
"from evidently.future.datasets import DataDefinition\n",
23+
"from evidently.future.descriptors import TextLength\n",
24+
"import pandas as pd\n",
25+
"from evidently.future.datasets import Dataset\n",
26+
"from evidently.future.presets.classification import ClassificationQuality\n",
27+
"from evidently.future.tests import lt"
28+
],
29+
"id": "6d19a877eacd9045",
30+
"outputs": [],
31+
"execution_count": null
32+
},
33+
{
34+
"cell_type": "code",
35+
"id": "initial_id",
36+
"metadata": {
37+
"collapsed": true
38+
},
39+
"source": [
40+
"from evidently.future.presets import DataSummaryPreset\n",
41+
"\n",
42+
"num_rows = 20\n",
43+
"np.random.seed(42)\n",
44+
"\n",
45+
"# Generate numerical data with some missing values\n",
46+
"num_col1 = np.random.randint(1, 100, num_rows).astype(float)\n",
47+
"num_col2 = np.random.uniform(10, 500, num_rows)\n",
48+
"num_col1[5] = np.nan \n",
49+
"num_col2[12] = np.nan \n",
50+
"\n",
51+
"# Generate categorical data with some missing values\n",
52+
"cat_col1 = np.random.choice(['A', 'B', 'C'], num_rows)\n",
53+
"cat_col2 = np.random.choice(['X', 'Y', 'Z'], num_rows)\n",
54+
"cat_col1[3] = np.nan \n",
55+
"cat_col2[8] = np.nan \n",
56+
"\n",
57+
"# Generate text data with some missing values\n",
58+
"text_col = np.random.choice(['Hello world', 'Test string', 'Sample text', 'Random text'], num_rows)\n",
59+
"text_col[6] = np.nan \n",
60+
"\n",
61+
"# Generate datetime data with some missing values\n",
62+
"date_col = pd.date_range(start='2025-01-01', periods=num_rows, freq='D')\n",
63+
"date_col = date_col.to_series().astype(\"object\") # Convert to object to allow NaNs\n",
64+
"date_col.iloc[10] = np.nan \n",
65+
"\n",
66+
"# Create DataFrame\n",
67+
"df = pd.DataFrame({\n",
68+
" 'Numerical_1': num_col1,\n",
69+
" 'Numerical_2': num_col2,\n",
70+
" 'Categorical_1': cat_col1,\n",
71+
" 'Categorical_2': cat_col2,\n",
72+
" 'Text': text_col,\n",
73+
" 'Datetime': date_col.values, \n",
74+
" 'Datetime2': date_col.values,\n",
75+
" 'Datetime3': date_col.values,\n",
76+
"})\n",
77+
"\n",
78+
"report = Report(\n",
79+
" [\n",
80+
" DataSummaryPreset(row_count_tests=[lt(1)])\n",
81+
" ],\n",
82+
" tags=[\"t2\"],\n",
83+
")\n",
84+
"\n",
85+
"report.set_model_id(\"m2\")\n",
86+
"\n",
87+
"snapshot = report.run(df, None, metadata={\"metadata_item\": \"meta_value\"}, tags=[\"t3\"])"
88+
],
89+
"outputs": [],
90+
"execution_count": null
91+
},
92+
{
93+
"metadata": {},
94+
"cell_type": "code",
95+
"source": "snapshot",
96+
"id": "4f09cb9ac4f36265",
97+
"outputs": [],
98+
"execution_count": null
99+
},
100+
{
101+
"metadata": {},
102+
"cell_type": "code",
103+
"source": [
104+
"import uuid\n",
105+
"from evidently.future.workspace import CloudWorkspace\n",
106+
"\n",
107+
"client = CloudWorkspace(token=\"\", url=\"http://localhost:8003\")\n",
108+
"client.add_run(uuid.UUID(\"01956698-b6d3-7ab0-9add-776f1a77ba78\"), snapshot)"
109+
],
110+
"id": "aa73787e3b5151c7",
111+
"outputs": [],
112+
"execution_count": null
113+
},
114+
{
115+
"metadata": {},
116+
"cell_type": "code",
117+
"source": [
118+
"from evidently.ui.workspace import CloudWorkspace\n",
119+
"import uuid\n",
120+
"\n",
121+
"client = CloudWorkspace(token=\"\", url=\"http://localhost:8003\")\n",
122+
"client.add_run(uuid.UUID(\"0195d6d0-ee9e-7b79-be49-a790c3a0692e\"), snapshot, include_data=True)"
123+
],
124+
"id": "514d62511c317e01",
125+
"outputs": [],
126+
"execution_count": null
127+
},
128+
{
129+
"metadata": {},
130+
"cell_type": "code",
131+
"source": "",
132+
"id": "2c13c4f2a9ffea3c",
133+
"outputs": [],
134+
"execution_count": null
135+
},
136+
{
137+
"metadata": {},
138+
"cell_type": "code",
139+
"source": "",
140+
"id": "1a74a64406ca5910",
141+
"outputs": [],
142+
"execution_count": null
143+
}
144+
],
145+
"metadata": {
146+
"kernelspec": {
147+
"display_name": "Python 3",
148+
"language": "python",
149+
"name": "python3"
150+
},
151+
"language_info": {
152+
"codemirror_mode": {
153+
"name": "ipython",
154+
"version": 2
155+
},
156+
"file_extension": ".py",
157+
"mimetype": "text/x-python",
158+
"name": "python",
159+
"nbconvert_exporter": "python",
160+
"pygments_lexer": "ipython2",
161+
"version": "2.7.6"
162+
}
163+
},
164+
"nbformat": 4,
165+
"nbformat_minor": 5
166+
}

requirements.min.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ nltk==3.6.7
88
scipy==1.10.0
99
requests==2.32.0
1010
PyYAML==5.4
11-
pydantic==1.10.13
11+
pydantic==1.10.16
1212
litestar==2.8.3
1313
typing-inspect==0.9.0
1414
uvicorn==0.22.0

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
"scipy>=1.10.0",
6161
"requests>=2.32.0",
6262
"PyYAML>=5.4",
63-
"pydantic>=1.10.13",
63+
"pydantic>=1.10.16",
6464
"litestar>=2.8.3",
6565
"typing-inspect>=0.9.0",
6666
"uvicorn[standard]>=0.22.0",

src/evidently/calculations/classification_performance.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.core.dtypes.common import is_string_dtype
1515
from sklearn import metrics
1616

17+
from evidently.core import Label
1718
from evidently.metric_results import Boxes
1819
from evidently.metric_results import ConfusionMatrix
1920
from evidently.metric_results import DatasetClassificationQuality
@@ -27,8 +28,8 @@
2728

2829

2930
def calculate_confusion_by_classes(
30-
confusion_matrix: np.ndarray, class_names: Sequence[Union[str, int]]
31-
) -> Dict[Union[str, int], Dict[str, int]]:
31+
confusion_matrix: np.ndarray, class_names: Sequence[Union[str, int, None]]
32+
) -> Dict[Label, Dict[str, int]]:
3233
"""Calculate metrics:
3334
- TP (true positive)
3435
- TN (true negative)
@@ -319,8 +320,8 @@ def calculate_lift_table(binded):
319320
return result
320321

321322

322-
def calculate_matrix(target: pd.Series, prediction: pd.Series, labels: List[Union[str, int]]) -> ConfusionMatrix:
323-
sorted_labels = sorted(labels)
323+
def calculate_matrix(target: pd.Series, prediction: pd.Series, labels: List[Label]) -> ConfusionMatrix:
324+
sorted_labels = sorted(labels) # type: ignore[type-var]
324325
matrix = metrics.confusion_matrix(target, prediction, labels=sorted_labels)
325326
return ConfusionMatrix(labels=sorted_labels, values=[row.tolist() for row in matrix])
326327

src/evidently/core.py

+3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636
IncludeOptions = Union["AbstractSetIntStr", "MappingIntStrAny"]
3737

3838

39+
Label = Union[int, str, None]
40+
41+
3942
class ColumnType(Enum):
4043
Numerical = "num"
4144
Categorical = "cat"

src/evidently/features/contains_link_feature.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,10 @@ class Config:
1515

1616
__feature_type__: ClassVar = ColumnType.Categorical
1717
display_name_template: ClassVar = "{column_name} contains link"
18-
column_name: str
1918

2019
def __init__(self, column_name: str, display_name: Optional[str] = None):
21-
self.column_name = column_name
2220
self.display_name = display_name
23-
super().__init__()
21+
super().__init__(column_name=column_name)
2422

2523
def apply(self, value: Any):
2624
if value is None or (isinstance(value, float) and np.isnan(value)):

src/evidently/features/is_valid_json_feature.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,10 @@ class Config:
1313

1414
__feature_type__: ClassVar = ColumnType.Categorical
1515
display_name_template: ClassVar = "JSON valid for {column_name}"
16-
column_name: str
1716

1817
def __init__(self, column_name: str, display_name: Optional[str] = None):
19-
self.column_name = column_name
2018
self.display_name = display_name
21-
super().__init__()
19+
super().__init__(column_name=column_name)
2220

2321
def apply(self, value: Any):
2422
try:

src/evidently/features/is_valid_python_feature.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,10 @@ class Config:
1313

1414
__feature_type__: ClassVar = ColumnType.Categorical
1515
display_name_template: ClassVar = "Valid Python for {column_name}"
16-
column_name: str
1716

1817
def __init__(self, column_name: str, display_name: Optional[str] = None):
19-
self.column_name = column_name
2018
self.display_name = display_name
21-
super().__init__()
19+
super().__init__(column_name=column_name)
2220

2321
def apply(self, value: Any) -> bool:
2422
try:

src/evidently/features/is_valid_sql_feature.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,10 @@ class Config:
1212

1313
__feature_type__: ClassVar = ColumnType.Categorical
1414
display_name_template: ClassVar = "SQL Validity Check for {column_name}"
15-
column_name: str
1615

1716
def __init__(self, column_name: str, display_name: Optional[str] = None):
18-
self.column_name = column_name
1917
self.display_name = display_name
20-
super().__init__()
18+
super().__init__(column_name=column_name)
2119

2220
def apply(self, value: Any):
2321
if value is None or not isinstance(value, str):

src/evidently/features/json_schema_match_feature.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import ClassVar
33
from typing import Dict
44
from typing import Optional
5+
from typing import Type
56

67
import pandas as pd
78

@@ -17,14 +18,14 @@ class Config:
1718

1819
__feature_type__: ClassVar = ColumnType.Categorical
1920
column_name: str
20-
expected_schema: Dict[str, type]
21+
expected_schema: Dict[str, Type]
2122
validate_types: bool
2223
exact_match: bool
2324

2425
def __init__(
2526
self,
2627
column_name: str,
27-
expected_schema: Dict[str, type],
28+
expected_schema: Dict[str, Type],
2829
validate_types: bool = False,
2930
exact_match: bool = False,
3031
display_name: Optional[str] = None,

src/evidently/future/_registry.py

+7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# fmt: off
33
from evidently.future.metric_types import BoundTest
44
from evidently.future.metric_types import Metric
5+
from evidently.future.metric_types import MetricResult
56
from evidently.future.metric_types import MetricTest
67
from evidently.pydantic_utils import register_type_alias
78

@@ -118,3 +119,9 @@
118119
register_type_alias(Metric, "evidently.future.metric_types.ByLabelCountMetric", "evidently:metric_v2:ByLabelCountMetric")
119120

120121
register_type_alias(Metric, "evidently.future.metric_types.ColumnMetric", "evidently:metric_v2:ColumnMetric")
122+
123+
register_type_alias(MetricResult, "evidently.future.metric_types.ByLabelCountValue", "evidently:metric_result_v2:ByLabelCountValue")
124+
register_type_alias(MetricResult, "evidently.future.metric_types.ByLabelValue", "evidently:metric_result_v2:ByLabelValue")
125+
register_type_alias(MetricResult, "evidently.future.metric_types.CountValue", "evidently:metric_result_v2:CountValue")
126+
register_type_alias(MetricResult, "evidently.future.metric_types.MeanStdValue", "evidently:metric_result_v2:MeanStdValue")
127+
register_type_alias(MetricResult, "evidently.future.metric_types.SingleValue", "evidently:metric_result_v2:SingleValue")

0 commit comments

Comments
 (0)