Skip to content

Commit 1e4d40a

Browse files
committed
feat: add pointblank tutorial materials
1 parent 027fc5d commit 1e4d40a

9 files changed

Lines changed: 441 additions & 0 deletions

python-pointblank/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Validating Data With Pointblank in Python
2+
3+
Supporting code and sample data for the Real Python tutorial
4+
"Validating Data With Pointblank in Python".
5+
6+
## Requirements
7+
8+
The Python scripts use PEP 723 dependency metadata and run with
9+
[uv](https://docs.astral.sh/uv/):
10+
11+
```console
12+
$ uv run pointblank_quickstart.py
13+
$ uv run pointblank_thresholds.py
14+
$ uv run pointblank_atoms.py
15+
```
16+
17+
The command-line examples can run without a project environment:
18+
19+
```console
20+
$ uv run --no-project --with 'pointblank[pl]' -- pb scan pointblank_atoms.csv
21+
$ uv run --no-project --with 'pointblank[pl]' -- pb missing pointblank_atoms.csv
22+
$ uvx --from 'pointblank[pl]' pb run pointblank_atoms.yaml --output-html pointblank_report.html
23+
```
24+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
atom_id,symbol,x,y,z,fx,fy,fz
2+
0,Cu,1.0,0.5,0.1,0.1,0.0,0.0
3+
1,Pt,2.1,1.5,0.2,-0.2,0.1,-0.1
4+
2,Cu,3.2,2.5,0.3,0.3,-0.1,0.1
5+
3,Pt,4.3,3.5,0.4,-0.1,0.0,0.0
6+
4,Cu,5.4,4.5,0.5,0.2,0.1,-0.1
7+
5,Pt,6.5,5.5,0.6,-0.3,-0.1,0.1
8+
6,Cu,7.6,6.5,0.7,0.1,0.0,0.0
9+
7,Pt,8.7,7.5,0.8,-0.2,0.1,-0.1
10+
8,Cu,9.8,8.5,0.9,0.3,-0.1,0.1
11+
9,Pt,10.9,9.5,1.0,-0.1,0.0,0.0
12+
10,Zz,0.5,0.5,0.1,0.0,0.0,0.0
13+
11,Cu,,1.5,0.2,0.0,0.0,0.0
14+
12,Pt,12.1,2.5,0.3,1500.0,0.0,0.0
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# /// script
2+
# requires-python = ">=3.10"
3+
# dependencies = [
4+
# "pointblank[pl]",
5+
# ]
6+
# ///
7+
8+
import polars as pl
9+
import pointblank as pb
10+
11+
VALID_ELEMENTS = {"Cu", "Pt"}
12+
13+
14+
def main() -> None:
15+
atoms = pl.read_csv("pointblank_atoms.csv")
16+
17+
validation = (
18+
pb.Validate(
19+
data=atoms,
20+
tbl_name="atoms_from_parser",
21+
label="Round-trip validation before re-export",
22+
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.25),
23+
)
24+
.col_vals_in_set(columns="symbol", set=list(VALID_ELEMENTS))
25+
.col_vals_not_null(columns=["x", "y", "z"])
26+
.col_vals_between(columns=["x", "y", "z"], left=0, right=20)
27+
.col_vals_between(columns="fx", left=-1000, right=1000)
28+
.interrogate()
29+
)
30+
31+
clean = validation.get_sundered_data(type="pass")
32+
dirty = validation.get_sundered_data(type="fail")
33+
34+
print(f"Safe to re-export: {len(clean)} rows")
35+
print(f"Needs review: {len(dirty)} rows\n")
36+
print(dirty.select(["atom_id", "symbol", "x", "fx"]))
37+
38+
39+
if __name__ == "__main__":
40+
main()
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
tbl: pointblank_atoms.csv
2+
df_library: polars
3+
tbl_name: "Atom Validation"
4+
label: "Tutorial YAML validation"
5+
thresholds:
6+
warning: 0.02
7+
error: 0.05
8+
critical: 0.07
9+
steps:
10+
- col_vals_in_set:
11+
columns: symbol
12+
set: [Cu, Pt]
13+
- col_vals_not_null:
14+
columns: x
15+
- col_vals_between:
16+
columns: fx
17+
left: -1000
18+
right: 1000
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# /// script
2+
# requires-python = ">=3.10"
3+
# dependencies = [
4+
# "pointblank[pl]",
5+
# ]
6+
# ///
7+
8+
import pointblank as pb
9+
10+
11+
def main() -> None:
12+
validation = (
13+
pb.Validate(
14+
data=pb.load_dataset("small_table", tbl_type="polars"),
15+
tbl_name="small_table",
16+
label="Quickstart validation",
17+
)
18+
.col_vals_between(columns="d", left=0, right=5000)
19+
.col_vals_in_set(columns="f", set=["low", "mid", "high"])
20+
.col_vals_not_null(columns="c")
21+
.interrogate()
22+
)
23+
24+
print("Validation summary:\n")
25+
for step in validation.validation_info:
26+
print(
27+
f"{step.assertion_type:>20} "
28+
f"passed={step.n_passed:>2} "
29+
f"failed={step.n_failed:>2}"
30+
)
31+
32+
print(
33+
"\nRun this same object in a notebook to see the interactive report."
34+
)
35+
36+
37+
if __name__ == "__main__":
38+
main()

python-pointblank/pointblank_report.html

Lines changed: 235 additions & 0 deletions
Large diffs are not rendered by default.
42.9 KB
Loading
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Starter Pointblank template for adapting to your own pipeline.
2+
#
3+
# You can run this template against a real file with:
4+
# uv run --no-project --with 'pointblank[pl]' -- pb run pointblank_starter_validation.yaml --data your_data.csv --fail-on critical
5+
6+
tbl: small_table
7+
df_library: polars
8+
tbl_name: "Starter Validation"
9+
label: "Adapt this template to your data"
10+
thresholds:
11+
warning: 0.02
12+
error: 0.05
13+
critical: 0.10
14+
steps:
15+
- col_exists:
16+
columns: [record_id, status, amount]
17+
- col_vals_not_null:
18+
columns: record_id
19+
- col_vals_in_set:
20+
columns: status
21+
set: [pending, shipped, delivered]
22+
- col_vals_gt:
23+
columns: amount
24+
value: 0
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# /// script
2+
# requires-python = ">=3.10"
3+
# dependencies = [
4+
# "pointblank[pl]",
5+
# ]
6+
# ///
7+
8+
import pointblank as pb
9+
10+
11+
def main() -> None:
12+
validation = (
13+
pb.Validate(
14+
data=pb.load_dataset("small_table", tbl_type="polars"),
15+
tbl_name="small_table",
16+
label="Threshold-driven validation",
17+
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
18+
actions=pb.Actions(
19+
warning=(
20+
"Warning: step {step} reached {level} severity during "
21+
"{type}."
22+
),
23+
critical=(
24+
"Critical: step {step} reached {level} severity during "
25+
"{type}."
26+
),
27+
),
28+
)
29+
.col_vals_between(columns="d", left=0, right=5000)
30+
.col_vals_not_null(columns="c")
31+
.rows_distinct()
32+
.interrogate()
33+
)
34+
35+
print("All checks passed perfectly:", validation.all_passed())
36+
print(
37+
"Anything above the error threshold:",
38+
validation.above_threshold(level="error"),
39+
)
40+
41+
try:
42+
validation.assert_below_threshold(level="critical")
43+
except AssertionError as exc:
44+
print("CI gate tripped:", exc)
45+
46+
47+
if __name__ == "__main__":
48+
main()

0 commit comments

Comments
 (0)