@@ -57,8 +57,9 @@ def __init__(
57
57
q50_tests : SingleValueMetricTests = None ,
58
58
q75_tests : SingleValueMetricTests = None ,
59
59
unique_values_count_tests : ByLabelMetricTests = None ,
60
+ include_tests : bool = True ,
60
61
):
61
- super ().__init__ (column = column )
62
+ super ().__init__ (column = column , include_tests = include_tests )
62
63
self ._row_count_tests = row_count_tests
63
64
self ._missing_values_count_tests = missing_values_count_tests
64
65
self ._min_tests = min_tests
@@ -72,23 +73,23 @@ def __init__(
72
73
73
74
def generate_metrics (self , context : Context ) -> Sequence [MetricOrContainer ]:
74
75
metrics : List [Metric ] = [
75
- RowCount (tests = self ._row_count_tests ),
76
- MissingValueCount (column = self ._column , tests = self ._missing_values_count_tests ),
76
+ RowCount (tests = self ._get_tests ( self . _row_count_tests ) ),
77
+ MissingValueCount (column = self ._column , tests = self ._get_tests ( self . _missing_values_count_tests ) ),
77
78
]
78
79
column_type = context .column (self ._column ).column_type
79
80
if column_type == ColumnType .Numerical :
80
81
metrics += [
81
- MinValue (column = self ._column , tests = self ._min_tests ),
82
- MaxValue (column = self ._column , tests = self ._max_tests ),
83
- MeanValue (column = self ._column , tests = self ._mean_tests ),
84
- StdValue (column = self ._column , tests = self ._std_tests ),
85
- QuantileValue (column = self ._column , quantile = 0.25 , tests = self ._q25_tests ),
86
- QuantileValue (column = self ._column , quantile = 0.5 , tests = self ._q50_tests ),
87
- QuantileValue (column = self ._column , quantile = 0.75 , tests = self ._q75_tests ),
82
+ MinValue (column = self ._column , tests = self ._get_tests ( self . _min_tests ) ),
83
+ MaxValue (column = self ._column , tests = self ._get_tests ( self . _max_tests ) ),
84
+ MeanValue (column = self ._column , tests = self ._get_tests ( self . _mean_tests ) ),
85
+ StdValue (column = self ._column , tests = self ._get_tests ( self . _std_tests ) ),
86
+ QuantileValue (column = self ._column , quantile = 0.25 , tests = self ._get_tests ( self . _q25_tests ) ),
87
+ QuantileValue (column = self ._column , quantile = 0.5 , tests = self ._get_tests ( self . _q50_tests ) ),
88
+ QuantileValue (column = self ._column , quantile = 0.75 , tests = self ._get_tests ( self . _q75_tests ) ),
88
89
]
89
90
if column_type == ColumnType .Categorical :
90
91
metrics += [
91
- UniqueValueCount (column = self ._column , tests = self ._unique_values_count_tests ),
92
+ UniqueValueCount (column = self ._column , tests = self ._get_tests ( self . _unique_values_count_tests ) ),
92
93
]
93
94
if column_type == ColumnType .Datetime :
94
95
metrics += [
@@ -313,6 +314,7 @@ def __init__(
313
314
empty_column_count_tests : SingleValueMetricTests = None ,
314
315
constant_columns_count_tests : SingleValueMetricTests = None ,
315
316
dataset_missing_value_count_tests : SingleValueMetricTests = None ,
317
+ include_tests : bool = True ,
316
318
):
317
319
self .duplicated_row_count_tests = duplicated_row_count_tests
318
320
self .duplicated_column_count_tests = duplicated_column_count_tests
@@ -324,23 +326,24 @@ def __init__(
324
326
self .dataset_missing_value_count_tests = dataset_missing_value_count_tests
325
327
self .column_count_tests = column_count_tests
326
328
self .row_count_tests = row_count_tests
329
+ super ().__init__ (include_tests = include_tests )
327
330
328
331
def generate_metrics (self , context : Context ) -> Sequence [MetricOrContainer ]:
329
332
return [
330
- RowCount (tests = self .row_count_tests ),
331
- ColumnCount (tests = self .column_count_tests ),
333
+ RowCount (tests = self ._get_tests ( self . row_count_tests ) ),
334
+ ColumnCount (tests = self ._get_tests ( self . column_count_tests ) ),
332
335
ColumnCount (column_type = ColumnType .Numerical , tests = []),
333
336
ColumnCount (column_type = ColumnType .Categorical , tests = []),
334
337
ColumnCount (column_type = ColumnType .Datetime , tests = []),
335
338
ColumnCount (column_type = ColumnType .Text , tests = []),
336
- DuplicatedRowCount (tests = self .duplicated_row_count_tests ),
337
- DuplicatedColumnsCount (tests = self .duplicated_column_count_tests ),
338
- AlmostDuplicatedColumnsCount (tests = self .almost_duplicated_column_count_tests ),
339
- AlmostConstantColumnsCount (tests = self .almost_constant_column_count_tests ),
340
- EmptyRowsCount (tests = self .empty_row_count_tests ),
341
- EmptyColumnsCount (tests = self .empty_column_count_tests ),
342
- ConstantColumnsCount (tests = self .constant_columns_count_tests ),
343
- DatasetMissingValueCount (tests = self .dataset_missing_value_count_tests ),
339
+ DuplicatedRowCount (tests = self ._get_tests ( self . duplicated_row_count_tests ) ),
340
+ DuplicatedColumnsCount (tests = self ._get_tests ( self . duplicated_column_count_tests ) ),
341
+ AlmostDuplicatedColumnsCount (tests = self ._get_tests ( self . almost_duplicated_column_count_tests ) ),
342
+ AlmostConstantColumnsCount (tests = self ._get_tests ( self . almost_constant_column_count_tests ) ),
343
+ EmptyRowsCount (tests = self ._get_tests ( self . empty_row_count_tests ) ),
344
+ EmptyColumnsCount (tests = self ._get_tests ( self . empty_column_count_tests ) ),
345
+ ConstantColumnsCount (tests = self ._get_tests ( self . constant_columns_count_tests ) ),
346
+ DatasetMissingValueCount (tests = self ._get_tests ( self . dataset_missing_value_count_tests ) ),
344
347
]
345
348
346
349
def render (
@@ -375,20 +378,27 @@ def __init__(
375
378
columns : Optional [List [str ]] = None ,
376
379
row_count_tests : SingleValueMetricTests = None ,
377
380
column_tests : Optional [Dict [str , ValueStatsTests ]] = None ,
381
+ include_tests : bool = True ,
378
382
):
379
383
self ._columns = columns
380
384
self ._value_stats : List [ValueStats ] = []
381
385
self ._row_count_tests = row_count_tests
382
386
self ._column_tests = column_tests
387
+ super ().__init__ (include_tests = include_tests )
383
388
384
389
def generate_metrics (self , context : Context ) -> Sequence [MetricOrContainer ]:
385
390
if self ._columns is None :
386
391
cols = context .data_definition .numerical_descriptors + context .data_definition .categorical_descriptors
387
392
else :
388
393
cols = self ._columns
389
- metrics : List [MetricOrContainer ] = [RowCount (tests = self ._row_count_tests )]
394
+ metrics : List [MetricOrContainer ] = [RowCount (tests = self ._get_tests ( self . _row_count_tests ) )]
390
395
self ._value_stats = [
391
- ValueStats (column , ** (self ._column_tests or {}).get (column , ValueStatsTests ()).__dict__ ) for column in cols
396
+ ValueStats (
397
+ column ,
398
+ ** (self ._column_tests or {}).get (column , ValueStatsTests ()).__dict__ ,
399
+ include_tests = self .include_tests ,
400
+ )
401
+ for column in cols
392
402
]
393
403
metrics .extend (list (chain (* [vs .metrics (context )[1 :] for vs in self ._value_stats ])))
394
404
return metrics
@@ -419,6 +429,7 @@ def __init__(
419
429
constant_columns_count_tests : SingleValueMetricTests = None ,
420
430
dataset_missing_value_count_tests : SingleValueMetricTests = None ,
421
431
column_tests : Optional [Dict [str , ValueStatsTests ]] = None ,
432
+ include_tests : bool = True ,
422
433
):
423
434
self .duplicated_row_count_tests = duplicated_row_count_tests
424
435
self .duplicated_column_count_tests = duplicated_column_count_tests
@@ -432,6 +443,7 @@ def __init__(
432
443
self .row_count_tests = row_count_tests
433
444
self ._columns = columns
434
445
self ._column_tests = column_tests
446
+ super ().__init__ (include_tests = include_tests )
435
447
436
448
def generate_metrics (self , context : Context ) -> Sequence [MetricOrContainer ]:
437
449
columns_ = context .data_definition .get_categorical_columns () + context .data_definition .get_numerical_columns ()
@@ -446,8 +458,11 @@ def generate_metrics(self, context: Context) -> Sequence[MetricOrContainer]:
446
458
empty_column_count_tests = self .empty_column_count_tests ,
447
459
constant_columns_count_tests = self .constant_columns_count_tests ,
448
460
dataset_missing_value_count_tests = self .dataset_missing_value_count_tests ,
461
+ include_tests = self .include_tests ,
462
+ )
463
+ self ._text_evals = TextEvals (
464
+ self ._columns or columns_ , column_tests = self ._column_tests , include_tests = self .include_tests
449
465
)
450
- self ._text_evals = TextEvals (self ._columns or columns_ , column_tests = self ._column_tests )
451
466
return self ._dataset_stats .metrics (context ) + self ._text_evals .metrics (context )
452
467
453
468
def render (
0 commit comments