Skip to content

Commit 9409309

Browse files
authored
Merge pull request #412 from legendu-net/dev
Merge dev into main
2 parents 9dda0cc + e352f23 commit 9409309

28 files changed

+1389
-2140
lines changed

aiutil/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""A utils Python package for data scientists.
22
"""
3+
34
from . import poetry
45

5-
__version__ = "0.82.0"
6+
__version__ = "0.83.0"

aiutil/collections.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Collections related utills.
22
"""
3+
34
from typing import Callable, Any
45

56

aiutil/cv.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Computer vision related utils.
22
"""
3+
34
from typing import Iterable
45
from pathlib import Path
56
from tqdm import tqdm, trange

aiutil/dataframe.py

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
"""Pandas DataFrame related utils.
22
"""
3+
34
from pathlib import Path
4-
from loguru import logger
55
import pandas as pd
6-
from ydata_profiling import ProfileReport
76

87

98
def table_2w(
@@ -45,39 +44,3 @@ def read_csv(path: str | Path, **kwargs) -> pd.DataFrame:
4544
if path.is_file():
4645
return pd.read_csv(path, **kwargs)
4746
return pd.concat(pd.read_csv(csv, **kwargs) for csv in path.glob("*.csv"))
48-
49-
50-
def dump_profile(df: pd.DataFrame | str | Path, title: str, output_dir: str | Path):
51-
"""Run ydata-profiling on a DataFrame and dump the report into files.
52-
53-
:param df: A pandas DataFrame.
54-
:param title: The title of the generated report.
55-
:param output_dir: The output directory for reports.
56-
:raises ValueError: If an input file other than Parquet/Pickle/CSV is provided.
57-
"""
58-
if isinstance(df, str):
59-
df = Path(df)
60-
if isinstance(df, Path):
61-
logger.info("Reading the DataFrame from {}...", df)
62-
ext = df.suffix.lower()
63-
if ext == ".parquet":
64-
df = pd.read_parquet(df)
65-
elif ext == ".pickle":
66-
df = pd.read_pickle(df)
67-
elif ext == ".csv":
68-
df = pd.read_csv(df)
69-
else:
70-
raise ValueError("Only Parquet, Pickle and CSV files are support!")
71-
logger.info("Shape of the DataFrame: {}", df.shape)
72-
logger.info("Profiling the DataFrame...")
73-
report = ProfileReport(df, title=title, minimal=True, explorative=True)
74-
if isinstance(output_dir, str):
75-
output_dir = Path(output_dir)
76-
output_dir.mkdir(exist_ok=True)
77-
# dump report
78-
logger.info("Dumping the report to HTML...")
79-
report.to_file(output_dir / "report.html")
80-
logger.info("Dumping the report to JSON...")
81-
report.to_file(output_dir / "report.json")
82-
logger.info("Dumping the report to Pickle...")
83-
report.dump(output_dir / "report.pickle")

0 commit comments

Comments
 (0)