Skip to content

Commit 17cb5d7

Browse files
authored
Merge branch 'posit-dev:main' into add-get_dataframe
2 parents 583e755 + 40a773b commit 17cb5d7

11 files changed

Lines changed: 278 additions & 54 deletions

File tree

.github/workflows/code-checks.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,11 @@ jobs:
1717
- uses: actions/setup-python@v5
1818
with:
1919
python-version: "3.10"
20+
- name: Setup uv
21+
uses: astral-sh/setup-uv@v2
22+
- name: Check validate.pyi is up to date
23+
run: |
24+
uv run make pyi
25+
git diff --exit-code pointblank/validate.pyi || \
26+
(echo "validate.pyi is out of date — run 'make pyi' and commit the result" && exit 1)
2027
- uses: pre-commit/action@v3.0.1

.pre-commit-config.yaml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
11
exclude: "(.*\\.svg)|(.*\\.qmd)|(.*\\.ambr)|(.*\\.csv)|(.*\\.txt)|(.*\\.json)|(.*\\.ipynb)|(.*\\.html)"
22
repos:
3+
- repo: local
4+
hooks:
5+
- id: check-pyi-sync
6+
name: validate.pyi must be up to date
7+
language: system
8+
entry: bash -c 'make pyi && git diff --exit-code pointblank/validate.pyi'
9+
pass_filenames: false
10+
stages: [commit]
311
- repo: https://github.com/pre-commit/pre-commit-hooks
412
rev: v4.6.0
513
hooks:
614
- id: trailing-whitespace
715
- id: end-of-file-fixer
16+
# NOTE: ruff version must match the pin in pyproject.toml [dependency-groups] dev
817
- repo: https://github.com/astral-sh/ruff-pre-commit
9-
rev: v0.9.9
18+
rev: v0.14.10
1019
hooks:
11-
# Run the linter.
1220
- id: ruff
1321
args: [--fix]
14-
# Run the formatter.
1522
- id: ruff-format

.vscode/settings.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
"editor.codeActionsOnSave": {
1414
"source.fixAll": "explicit",
1515
"source.organizeImports": "explicit"
16-
},
16+
}
1717
},
1818
"[markdown]": {
1919
"editor.formatOnSave": false
2020
},
21+
"ruff.format.args": ["--exclude", "*.pyi", "--exclude", "*.md"],
22+
"ruff.lint.args": ["--exclude", "*.pyi", "--exclude", "*.md"],
2123
"python.testing.pytestArgs": ["tests"],
2224
"python.testing.unittestEnabled": false,
2325
"python.testing.pytestEnabled": true,

CITATION.cff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ message: 'If you wish to cite the "Pointblank" package use:'
33
type: software
44
license: MIT
55
title: "Pointblank: data validation toolkit for assessing and monitoring data quality."
6-
version: 0.21.0
6+
version: 0.22.0
77
abstract: Validate data in Polars and Pandas DataFrames and database tables.
88
Validation pipelines can be made using easily-readable, consecutive validation
99
steps. Upon execution of the validation plan, several reporting options are available.

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ pyi: ## Generate .pyi stub files
66
--include-private \
77
-o .
88
@uv run scripts/generate_agg_validate_pyi.py
9+
@uv run ruff check --fix pointblank/validate.pyi
10+
@uv run ruff format pointblank/validate.pyi
911

1012
.PHONY: test
1113
test:

pointblank/_agg.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,20 @@ def _generic_between(real: float, lower: float, upper: float) -> bool:
8686
return bool(lower <= real <= upper)
8787

8888

89+
def split_agg_name(name: str) -> tuple[str, str]:
90+
"""Split an aggregation method name into aggregator and comparator names.
91+
92+
Args:
93+
name (str): The aggregation method name (e.g., "col_sum_eq" or "sum_eq").
94+
95+
Returns:
96+
tuple[str, str]: A tuple of (agg_name, comp_name) e.g., ("sum", "eq").
97+
"""
98+
name = name.removeprefix("col_")
99+
agg_name, comp_name = name.rsplit("_", 1)
100+
return agg_name, comp_name
101+
102+
89103
def resolve_agg_registries(name: str) -> tuple[Aggregator, Comparator]:
90104
"""Resolve the assertion name to a valid aggregator
91105
@@ -95,8 +109,7 @@ def resolve_agg_registries(name: str) -> tuple[Aggregator, Comparator]:
95109
Returns:
96110
tuple[Aggregator, Comparator]: The aggregator and comparator functions.
97111
"""
98-
name = name.removeprefix("col_")
99-
agg_name, comp_name = name.split("_")[-2:]
112+
agg_name, comp_name = split_agg_name(name)
100113

101114
aggregator = AGGREGATOR_REGISTRY.get(agg_name)
102115
comparator = COMPARATOR_REGISTRY.get(comp_name)

pointblank/validate.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@
2828
from great_tables.vals import fmt_integer, fmt_number
2929
from importlib_resources import files
3030

31-
from pointblank._agg import is_valid_agg, load_validation_method_grid, resolve_agg_registries
31+
from pointblank._agg import (
32+
is_valid_agg,
33+
load_validation_method_grid,
34+
resolve_agg_registries,
35+
split_agg_name,
36+
)
3237
from pointblank._constants import (
3338
ASSERTION_TYPE_METHOD_MAP,
3439
CHECK_MARK_SPAN,
@@ -19174,6 +19179,15 @@ def _create_autobrief_or_failure_text(
1917419179
for_failure=for_failure,
1917519180
)
1917619181

19182+
if is_valid_agg(assertion_type):
19183+
return _create_text_agg(
19184+
lang=lang,
19185+
assertion_type=assertion_type,
19186+
column=column,
19187+
values=values,
19188+
for_failure=for_failure,
19189+
)
19190+
1917719191
return None
1917819192

1917919193

@@ -19208,6 +19222,52 @@ def _create_text_comparison(
1920819222
)
1920919223

1921019224

19225+
def _create_text_agg(
19226+
lang: str,
19227+
assertion_type: str,
19228+
column: str | list[str],
19229+
values: dict[str, Any],
19230+
for_failure: bool = False,
19231+
) -> str:
19232+
"""Create autobrief text for aggregation methods like col_sum_eq, col_avg_gt, etc."""
19233+
type_ = _expect_failure_type(for_failure=for_failure)
19234+
19235+
agg_type, comp_type = split_agg_name(assertion_type)
19236+
19237+
# this is covered by the test `test_brief_auto_all_agg_methods` to make sure we don't
19238+
# create any weird secret agg constants.
19239+
agg_display_names: dict[str, str] = {
19240+
"sum": "sum",
19241+
"avg": "average",
19242+
"sd": "standard deviation",
19243+
}
19244+
try:
19245+
agg_display: str = agg_display_names[agg_type]
19246+
except KeyError as ke: # pragma: no cover
19247+
raise AssertionError from ke # This should never happen in prod, it's caught in CI.
19248+
19249+
# Get the operator
19250+
comparison_assertion = f"col_vals_{comp_type}"
19251+
if lang == "ar": # pragma: no cover
19252+
operator = COMPARISON_OPERATORS_AR.get(comparison_assertion, comp_type)
19253+
else:
19254+
operator = COMPARISON_OPERATORS.get(comparison_assertion, comp_type)
19255+
19256+
column_text = _prep_column_text(column=column)
19257+
19258+
value = values.get("value", values) if isinstance(values, dict) else values
19259+
values_text = _prep_values_text(values=str(value), lang=lang, limit=3)
19260+
19261+
# "Expect that the {agg} of {column} should be {operator} {value}."
19262+
agg_expectation_text = EXPECT_FAIL_TEXT[f"compare_{type_}_text"][lang]
19263+
19264+
return agg_expectation_text.format(
19265+
column_text=f"the {agg_display} of {column_text}",
19266+
operator=operator,
19267+
values_text=values_text,
19268+
)
19269+
19270+
1921119271
def _create_text_between(
1921219272
lang: str,
1921319273
column: str,

pointblank/validate.pyi

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
from pointblank import Actions, Thresholds
2-
from pointblank._utils import _PBUnresolvedColumn
3-
from pointblank.column import Column, ReferenceColumn
4-
from pointblank._typing import Tolerance
5-
61
import datetime
72
from collections.abc import Collection
83
from dataclasses import dataclass
@@ -17,20 +12,20 @@ from pointblank.thresholds import Actions, FinalActions, Thresholds
1712
from typing import Any, Callable, Literal, ParamSpec, TypeVar
1813

1914
__all__ = [
20-
"Validate",
15+
"get_action_metadata",
16+
"get_validation_summary",
17+
"config",
2118
"load_dataset",
2219
"read_file",
2320
"write_file",
24-
"config",
25-
"connect_to_table",
26-
"print_database_tables",
21+
"get_data_path",
2722
"preview",
2823
"missing_vals_tbl",
29-
"get_action_metadata",
3024
"get_column_count",
31-
"get_data_path",
3225
"get_row_count",
33-
"get_validation_summary",
26+
"connect_to_table",
27+
"print_database_tables",
28+
"Validate",
3429
]
3530

3631
P = ParamSpec("P")
@@ -586,7 +581,7 @@ class Validate:
586581
thresholds: float | bool | tuple | dict | Thresholds | None = None,
587582
brief: str | bool = False,
588583
actions: Actions | None = None,
589-
active: bool = True,
584+
active: bool | Callable = True,
590585
) -> Validate:
591586
"""Assert the values in a column sum to a value eq some `value`.
592587
@@ -622,7 +617,7 @@ class Validate:
622617
thresholds: float | bool | tuple | dict | Thresholds | None = None,
623618
brief: str | bool = False,
624619
actions: Actions | None = None,
625-
active: bool = True,
620+
active: bool | Callable = True,
626621
) -> Validate:
627622
"""Assert the values in a column sum to a value gt some `value`.
628623
@@ -658,7 +653,7 @@ class Validate:
658653
thresholds: float | bool | tuple | dict | Thresholds | None = None,
659654
brief: str | bool = False,
660655
actions: Actions | None = None,
661-
active: bool = True,
656+
active: bool | Callable = True,
662657
) -> Validate:
663658
"""Assert the values in a column sum to a value ge some `value`.
664659
@@ -694,7 +689,7 @@ class Validate:
694689
thresholds: float | bool | tuple | dict | Thresholds | None = None,
695690
brief: str | bool = False,
696691
actions: Actions | None = None,
697-
active: bool = True,
692+
active: bool | Callable = True,
698693
) -> Validate:
699694
"""Assert the values in a column sum to a value lt some `value`.
700695
@@ -730,7 +725,7 @@ class Validate:
730725
thresholds: float | bool | tuple | dict | Thresholds | None = None,
731726
brief: str | bool = False,
732727
actions: Actions | None = None,
733-
active: bool = True,
728+
active: bool | Callable = True,
734729
) -> Validate:
735730
"""Assert the values in a column sum to a value le some `value`.
736731
@@ -766,7 +761,7 @@ class Validate:
766761
thresholds: float | bool | tuple | dict | Thresholds | None = None,
767762
brief: str | bool = False,
768763
actions: Actions | None = None,
769-
active: bool = True,
764+
active: bool | Callable = True,
770765
) -> Validate:
771766
"""Assert the values in a column avg to a value eq some `value`.
772767
@@ -802,7 +797,7 @@ class Validate:
802797
thresholds: float | bool | tuple | dict | Thresholds | None = None,
803798
brief: str | bool = False,
804799
actions: Actions | None = None,
805-
active: bool = True,
800+
active: bool | Callable = True,
806801
) -> Validate:
807802
"""Assert the values in a column avg to a value gt some `value`.
808803
@@ -838,7 +833,7 @@ class Validate:
838833
thresholds: float | bool | tuple | dict | Thresholds | None = None,
839834
brief: str | bool = False,
840835
actions: Actions | None = None,
841-
active: bool = True,
836+
active: bool | Callable = True,
842837
) -> Validate:
843838
"""Assert the values in a column avg to a value ge some `value`.
844839
@@ -874,7 +869,7 @@ class Validate:
874869
thresholds: float | bool | tuple | dict | Thresholds | None = None,
875870
brief: str | bool = False,
876871
actions: Actions | None = None,
877-
active: bool = True,
872+
active: bool | Callable = True,
878873
) -> Validate:
879874
"""Assert the values in a column avg to a value lt some `value`.
880875
@@ -910,7 +905,7 @@ class Validate:
910905
thresholds: float | bool | tuple | dict | Thresholds | None = None,
911906
brief: str | bool = False,
912907
actions: Actions | None = None,
913-
active: bool = True,
908+
active: bool | Callable = True,
914909
) -> Validate:
915910
"""Assert the values in a column avg to a value le some `value`.
916911
@@ -946,7 +941,7 @@ class Validate:
946941
thresholds: float | bool | tuple | dict | Thresholds | None = None,
947942
brief: str | bool = False,
948943
actions: Actions | None = None,
949-
active: bool = True,
944+
active: bool | Callable = True,
950945
) -> Validate:
951946
"""Assert the values in a column sd to a value eq some `value`.
952947
@@ -982,7 +977,7 @@ class Validate:
982977
thresholds: float | bool | tuple | dict | Thresholds | None = None,
983978
brief: str | bool = False,
984979
actions: Actions | None = None,
985-
active: bool = True,
980+
active: bool | Callable = True,
986981
) -> Validate:
987982
"""Assert the values in a column sd to a value gt some `value`.
988983
@@ -1018,7 +1013,7 @@ class Validate:
10181013
thresholds: float | bool | tuple | dict | Thresholds | None = None,
10191014
brief: str | bool = False,
10201015
actions: Actions | None = None,
1021-
active: bool = True,
1016+
active: bool | Callable = True,
10221017
) -> Validate:
10231018
"""Assert the values in a column sd to a value ge some `value`.
10241019
@@ -1054,7 +1049,7 @@ class Validate:
10541049
thresholds: float | bool | tuple | dict | Thresholds | None = None,
10551050
brief: str | bool = False,
10561051
actions: Actions | None = None,
1057-
active: bool = True,
1052+
active: bool | Callable = True,
10581053
) -> Validate:
10591054
"""Assert the values in a column sd to a value lt some `value`.
10601055
@@ -1090,7 +1085,7 @@ class Validate:
10901085
thresholds: float | bool | tuple | dict | Thresholds | None = None,
10911086
brief: str | bool = False,
10921087
actions: Actions | None = None,
1093-
active: bool = True,
1088+
active: bool | Callable = True,
10941089
) -> Validate:
10951090
"""Assert the values in a column sd to a value le some `value`.
10961091

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ dev = [
107107
"pytest-xdist>=3.6.1",
108108
"pytz>=2025.2",
109109
"quartodoc>=0.8.1; python_version >= '3.9'",
110-
"ruff>=0.9.9",
110+
"ruff==0.14.10", # NOTE: must match rev in .pre-commit-config.yaml
111111
"shiny>=1.4.0",
112112
"openpyxl>=3.0.0",
113113
"mcp[cli]>=1.10.1",
@@ -136,7 +136,7 @@ testpaths = ["tests"]
136136
line-length = 100
137137

138138
[tool.ruff.lint]
139-
exclude = ["docs", ".venv", "tests/*"]
139+
exclude = ["docs", ".venv", "tests/*", "pointblank/validate.pyi"]
140140

141141
ignore = [
142142
"E402", # module level import not at top of file

0 commit comments

Comments
 (0)