|
1 | 1 | """Pandas DataFrame related utils. |
2 | 2 | """ |
| 3 | + |
3 | 4 | from pathlib import Path |
4 | | -from loguru import logger |
5 | 5 | import pandas as pd |
6 | | -from ydata_profiling import ProfileReport |
7 | 6 |
|
8 | 7 |
|
9 | 8 | def table_2w( |
@@ -45,39 +44,3 @@ def read_csv(path: str | Path, **kwargs) -> pd.DataFrame: |
45 | 44 | if path.is_file(): |
46 | 45 | return pd.read_csv(path, **kwargs) |
47 | 46 | return pd.concat(pd.read_csv(csv, **kwargs) for csv in path.glob("*.csv")) |
48 | | - |
49 | | - |
50 | | -def dump_profile(df: pd.DataFrame | str | Path, title: str, output_dir: str | Path): |
51 | | - """Run ydata-profiling on a DataFrame and dump the report into files. |
52 | | -
|
53 | | - :param df: A pandas DataFrame. |
54 | | - :param title: The title of the generated report. |
55 | | - :param output_dir: The output directory for reports. |
56 | | - :raises ValueError: If an input file other than Parquet/Pickle/CSV is provided. |
57 | | - """ |
58 | | - if isinstance(df, str): |
59 | | - df = Path(df) |
60 | | - if isinstance(df, Path): |
61 | | - logger.info("Reading the DataFrame from {}...", df) |
62 | | - ext = df.suffix.lower() |
63 | | - if ext == ".parquet": |
64 | | - df = pd.read_parquet(df) |
65 | | - elif ext == ".pickle": |
66 | | - df = pd.read_pickle(df) |
67 | | - elif ext == ".csv": |
68 | | - df = pd.read_csv(df) |
69 | | - else: |
70 | | - raise ValueError("Only Parquet, Pickle and CSV files are support!") |
71 | | - logger.info("Shape of the DataFrame: {}", df.shape) |
72 | | - logger.info("Profiling the DataFrame...") |
73 | | - report = ProfileReport(df, title=title, minimal=True, explorative=True) |
74 | | - if isinstance(output_dir, str): |
75 | | - output_dir = Path(output_dir) |
76 | | - output_dir.mkdir(exist_ok=True) |
77 | | - # dump report |
78 | | - logger.info("Dumping the report to HTML...") |
79 | | - report.to_file(output_dir / "report.html") |
80 | | - logger.info("Dumping the report to JSON...") |
81 | | - report.to_file(output_dir / "report.json") |
82 | | - logger.info("Dumping the report to Pickle...") |
83 | | - report.dump(output_dir / "report.pickle") |
0 commit comments