|
1 | 1 | import datetime as dt |
2 | | -import os |
| 2 | +import posixpath |
3 | 3 | import uuid |
4 | 4 | from typing import Generator |
5 | 5 |
|
|
10 | 10 | import pyarrow.parquet as pq |
11 | 11 | from fsspec import AbstractFileSystem |
12 | 12 |
|
13 | | -from ..misc import convert_large_types_to_standard, run_parallel, _dict_to_dataframe |
14 | | -from ..polars import pl |
| 13 | +from ..utils.misc import ( |
| 14 | + convert_large_types_to_standard, |
| 15 | + run_parallel, |
| 16 | + _dict_to_dataframe, |
| 17 | +) |
| 18 | +from ..utils.polars import pl |
15 | 19 |
|
16 | 20 | import importlib |
17 | 21 |
|
@@ -72,8 +76,8 @@ def _read_json( |
72 | 76 | if "**" in path: |
73 | 77 | path = self.glob(path) |
74 | 78 | else: |
75 | | - if ".json" not in os.path.basename(path): |
76 | | - path = os.path.join(path, "**/*.jsonl" if jsonlines else "**/*.json") |
| 79 | + if ".json" not in posixpath.basename(path): |
| 80 | + path = posixpath.join(path, "**/*.jsonl" if jsonlines else "**/*.json") |
77 | 81 | path = self.glob(path) |
78 | 82 |
|
79 | 83 | if isinstance(path, list): |
@@ -147,8 +151,8 @@ def _read_json_batches( |
147 | 151 | if "**" in path: |
148 | 152 | path = self.glob(path) |
149 | 153 | else: |
150 | | - if ".json" not in os.path.basename(path): |
151 | | - path = os.path.join(path, "**/*.jsonl" if jsonlines else "**/*.json") |
| 154 | + if ".json" not in posixpath.basename(path): |
| 155 | + path = posixpath.join(path, "**/*.jsonl" if jsonlines else "**/*.json") |
152 | 156 | path = self.glob(path) |
153 | 157 |
|
154 | 158 | if isinstance(path, str): |
@@ -305,8 +309,8 @@ def _read_csv( |
305 | 309 | if "**" in path: |
306 | 310 | path = self.glob(path) |
307 | 311 | else: |
308 | | - if ".csv" not in os.path.basename(path): |
309 | | - path = os.path.join(path, "**/*.csv") |
| 312 | + if ".csv" not in posixpath.basename(path): |
| 313 | + path = posixpath.join(path, "**/*.csv") |
310 | 314 | path = self.glob(path) |
311 | 315 |
|
312 | 316 | if isinstance(path, list): |
@@ -363,8 +367,8 @@ def _read_csv_batches( |
363 | 367 | if "**" in path: |
364 | 368 | path = self.glob(path) |
365 | 369 | else: |
366 | | - if ".csv" not in os.path.basename(path): |
367 | | - path = os.path.join(path, "**/*.csv") |
| 370 | + if ".csv" not in posixpath.basename(path): |
| 371 | + path = posixpath.join(path, "**/*.csv") |
368 | 372 | path = self.glob(path) |
369 | 373 |
|
370 | 374 | # Ensure path is a list |
@@ -497,12 +501,12 @@ def _read_parquet( |
497 | 501 | if isinstance(path, str): |
498 | 502 | if "**" in path: |
499 | 503 | if "*.parquet" in path: |
500 | | - path = os.path.join(path, "*.parquet") |
| 504 | + path = posixpath.join(path, "*.parquet") |
501 | 505 |
|
502 | 506 | path = self.glob(path) |
503 | 507 | else: |
504 | 508 | if ".parquet" in path: |
505 | | - path = os.path.join(path, "**/*.parquet") |
| 509 | + path = posixpath.join(path, "**/*.parquet") |
506 | 510 | path = self.glob(path) |
507 | 511 |
|
508 | 512 | if isinstance(path, list): |
@@ -568,11 +572,11 @@ def _read_parquet_batches( |
568 | 572 | if isinstance(path, str): |
569 | 573 | if "**" in path: |
570 | 574 | if "*.parquet" not in path: |
571 | | - path = os.path.join(path, "**/*.parquet") |
| 575 | + path = posixpath.join(path, "**/*.parquet") |
572 | 576 | path = self.glob(path) |
573 | 577 | else: |
574 | 578 | if ".parquet" not in path: |
575 | | - path = os.path.join(path, "**/*.parquet") |
| 579 | + path = posixpath.join(path, "**/*.parquet") |
576 | 580 | path = self.glob(path) |
577 | 581 |
|
578 | 582 | if not isinstance(path, list): |
@@ -825,7 +829,7 @@ def pyarrow_parquet_dataset( |
825 | 829 | (pds.Dataset): Pyarrow dataset. |
826 | 830 | """ |
827 | 831 | if not self.is_file(path): |
828 | | - path = os.path.join(path, "_metadata") |
| 832 | + path = posixpath.join(path, "_metadata") |
829 | 833 | return pds.dataset( |
830 | 834 | path, |
831 | 835 | filesystem=self, |
@@ -1076,7 +1080,7 @@ def _write(i, data, p, basename): |
1076 | 1080 | if mode == "delete_matching": |
1077 | 1081 | write_file(self, data[i], p, format, **kwargs) |
1078 | 1082 | elif mode == "overwrite": |
1079 | | - self.fs.rm(os.path.dirname(p), recursive=True) |
| 1083 | + self.fs.rm(posixpath.dirname(p), recursive=True) |
1080 | 1084 | write_file(self, data[i], p, format, **kwargs) |
1081 | 1085 | elif mode == "append": |
1082 | 1086 | if not self.exists(p): |
|
0 commit comments