|
5 | 5 | import argparse |
6 | 6 | import csv |
7 | 7 | import gc |
| 8 | +import importlib |
8 | 9 | import os |
9 | 10 | import shutil |
10 | 11 | import statistics |
|
20 | 21 |
|
21 | 22 | import polars_janitor as pj |
22 | 23 |
|
23 | | -if TYPE_CHECKING: |
24 | | - from collections.abc import Callable |
25 | | - from types import ModuleType |
26 | | - |
27 | 24 | try: |
28 | | - import janitor as pyjanitor |
29 | | - import pandas as pd |
| 25 | + pyjanitor = importlib.import_module("janitor") |
30 | 26 | except ImportError: |
31 | 27 | pyjanitor = None |
| 28 | + |
| 29 | +try: |
| 30 | + importlib.import_module("janitor.polars") |
| 31 | + pyjanitor_polars_available = True |
| 32 | +except ImportError: |
| 33 | + pyjanitor_polars_available = False |
| 34 | + |
| 35 | +try: |
| 36 | + pd = importlib.import_module("pandas") |
| 37 | +except ImportError: |
32 | 38 | pd = None |
33 | 39 |
|
| 40 | +if TYPE_CHECKING: |
| 41 | + from collections.abc import Callable |
| 42 | + from types import ModuleType |
| 43 | + |
34 | 44 |
|
35 | 45 | PATTERNS = [ |
36 | 46 | "Customer ID", |
@@ -265,7 +275,46 @@ def bench_polars_janitor( |
265 | 275 | ] |
266 | 276 |
|
267 | 277 |
|
268 | | -def bench_pyjanitor( |
| 278 | +def bench_pyjanitor_polars( |
| 279 | + *, |
| 280 | + repeats: int, |
| 281 | + clean_size: int, |
| 282 | + header_size: int, |
| 283 | +) -> list[BenchmarkResult]: |
| 284 | + """Run pyjanitor's Polars benchmark cases when that namespace is installed.""" |
| 285 | + if not pyjanitor_polars_available: |
| 286 | + return [] |
| 287 | + |
| 288 | + clean_frame = make_polars_frame(clean_size) |
| 289 | + sheet = make_polars_sheet(header_size) |
| 290 | + |
| 291 | + return [ |
| 292 | + BenchmarkResult( |
| 293 | + "clean_names", |
| 294 | + clean_size, |
| 295 | + "pyjanitor/Polars", |
| 296 | + median_ms( |
| 297 | + lambda: clean_frame.clean_names(strip_accents=True, remove_special=True), |
| 298 | + repeats=repeats, |
| 299 | + ), |
| 300 | + ), |
| 301 | + BenchmarkResult( |
| 302 | + "row_to_names + clean_names", |
| 303 | + header_size, |
| 304 | + "pyjanitor/Polars", |
| 305 | + median_ms( |
| 306 | + lambda: sheet.row_to_names( |
| 307 | + row_numbers=1, |
| 308 | + remove_rows=True, |
| 309 | + remove_rows_above=True, |
| 310 | + ).clean_names(strip_accents=True, remove_special=True), |
| 311 | + repeats=repeats, |
| 312 | + ), |
| 313 | + ), |
| 314 | + ] |
| 315 | + |
| 316 | + |
| 317 | +def bench_pyjanitor_pandas( |
269 | 318 | *, |
270 | 319 | repeats: int, |
271 | 320 | clean_size: int, |
@@ -385,10 +434,10 @@ def print_markdown(results: list[BenchmarkResult]) -> None: |
385 | 434 | """Print a README-ready Markdown table.""" |
386 | 435 | by_key = {(result.task, result.size, result.implementation): result for result in results} |
387 | 436 | task_sizes = sorted({(result.task, result.size) for result in results}) |
388 | | - implementations = ["polars-janitor", "pyjanitor/pandas", "R janitor"] |
| 437 | + implementations = ["polars-janitor", "pyjanitor/Polars", "pyjanitor/pandas", "R janitor"] |
389 | 438 |
|
390 | | - print("| Task | Size | polars-janitor | pyjanitor/pandas | R janitor |") |
391 | | - print("| --- | ---: | ---: | ---: | ---: |") |
| 439 | + print("| Task | Size | polars-janitor | pyjanitor/Polars | pyjanitor/pandas | R janitor |") |
| 440 | + print("| --- | ---: | ---: | ---: | ---: | ---: |") |
392 | 441 | for task, size in task_sizes: |
393 | 442 | values = [] |
394 | 443 | for implementation in implementations: |
@@ -420,7 +469,12 @@ def main() -> None: |
420 | 469 | header_size=args.header_size, |
421 | 470 | compare_size=args.compare_size, |
422 | 471 | ), |
423 | | - *bench_pyjanitor( |
| 472 | + *bench_pyjanitor_polars( |
| 473 | + repeats=args.repeats, |
| 474 | + clean_size=args.clean_size, |
| 475 | + header_size=args.header_size, |
| 476 | + ), |
| 477 | + *bench_pyjanitor_pandas( |
424 | 478 | repeats=args.repeats, |
425 | 479 | clean_size=args.clean_size, |
426 | 480 | header_size=args.header_size, |
|
0 commit comments