|
32 | 32 | from pandas.core.dtypes.common import (
|
33 | 33 | ensure_str,
|
34 | 34 | is_string_dtype,
|
| 35 | + pandas_dtype, |
35 | 36 | )
|
36 | 37 | from pandas.core.dtypes.dtypes import PeriodDtype
|
37 | 38 |
|
38 | 39 | from pandas import (
|
| 40 | + ArrowDtype, |
39 | 41 | DataFrame,
|
40 | 42 | Index,
|
41 | 43 | MultiIndex,
|
@@ -942,29 +944,61 @@ def read(self) -> DataFrame | Series:
|
942 | 944 | obj: DataFrame | Series
|
943 | 945 | with self:
|
944 | 946 | if self.engine == "pyarrow":
|
945 |
| - pyarrow_json = import_optional_dependency("pyarrow.json") |
946 |
| - pa_table = pyarrow_json.read_json(self.data) |
947 |
| - return arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend) |
| 947 | + obj = self._read_pyarrow() |
948 | 948 | elif self.engine == "ujson":
|
949 |
| - if self.lines: |
950 |
| - if self.chunksize: |
951 |
| - obj = concat(self) |
952 |
| - elif self.nrows: |
953 |
| - lines = list(islice(self.data, self.nrows)) |
954 |
| - lines_json = self._combine_lines(lines) |
955 |
| - obj = self._get_object_parser(lines_json) |
956 |
| - else: |
957 |
| - data = ensure_str(self.data) |
958 |
| - data_lines = data.split("\n") |
959 |
| - obj = self._get_object_parser(self._combine_lines(data_lines)) |
960 |
| - else: |
961 |
| - obj = self._get_object_parser(self.data) |
962 |
| - if self.dtype_backend is not lib.no_default: |
963 |
| - return obj.convert_dtypes( |
964 |
| - infer_objects=False, dtype_backend=self.dtype_backend |
965 |
| - ) |
966 |
| - else: |
967 |
| - return obj |
| 949 | + obj = self._read_ujson() |
| 950 | + |
| 951 | + return obj |
| 952 | + |
| 953 | + def _read_pyarrow(self) -> DataFrame: |
| 954 | + """ |
| 955 | + Read JSON using the pyarrow engine. |
| 956 | + """ |
| 957 | + pyarrow_json = import_optional_dependency("pyarrow.json") |
| 958 | + options = None |
| 959 | + |
| 960 | + if isinstance(self.dtype, dict): |
| 961 | + pa = import_optional_dependency("pyarrow") |
| 962 | + fields = [] |
| 963 | + for field, dtype in self.dtype.items(): |
| 964 | + pd_dtype = pandas_dtype(dtype) |
| 965 | + if isinstance(pd_dtype, ArrowDtype): |
| 966 | + fields.append((field, pd_dtype.pyarrow_dtype)) |
| 967 | + |
| 968 | + schema = pa.schema(fields) |
| 969 | + options = pyarrow_json.ParseOptions( |
| 970 | + explicit_schema=schema, unexpected_field_behavior="infer" |
| 971 | + ) |
| 972 | + |
| 973 | + pa_table = pyarrow_json.read_json(self.data, parse_options=options) |
| 974 | + df = arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend) |
| 975 | + |
| 976 | + return df |
| 977 | + |
| 978 | + def _read_ujson(self) -> DataFrame | Series: |
| 979 | + """ |
| 980 | + Read JSON using the ujson engine. |
| 981 | + """ |
| 982 | + obj: DataFrame | Series |
| 983 | + if self.lines: |
| 984 | + if self.chunksize: |
| 985 | + obj = concat(self) |
| 986 | + elif self.nrows: |
| 987 | + lines = list(islice(self.data, self.nrows)) |
| 988 | + lines_json = self._combine_lines(lines) |
| 989 | + obj = self._get_object_parser(lines_json) |
| 990 | + else: |
| 991 | + data = ensure_str(self.data) |
| 992 | + data_lines = data.split("\n") |
| 993 | + obj = self._get_object_parser(self._combine_lines(data_lines)) |
| 994 | + else: |
| 995 | + obj = self._get_object_parser(self.data) |
| 996 | + if self.dtype_backend is not lib.no_default: |
| 997 | + return obj.convert_dtypes( |
| 998 | + infer_objects=False, dtype_backend=self.dtype_backend |
| 999 | + ) |
| 1000 | + else: |
| 1001 | + return obj |
968 | 1002 |
|
969 | 1003 | def _get_object_parser(self, json: str) -> DataFrame | Series:
|
970 | 1004 | """
|
|
0 commit comments