diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 97e476ee..ae4a1b4c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v6.0.0 hooks: - id: check-added-large-files - id: check-case-conflict @@ -19,14 +19,14 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.1.3 + rev: v0.15.6 hooks: - id: ruff args: ["--fix", "--show-fixes"] - id: ruff-format - repo: https://github.com/adamchainz/blacken-docs - rev: "1.16.0" + rev: "1.20.0" hooks: - id: blacken-docs additional_dependencies: @@ -40,28 +40,28 @@ repos: - id: rst-inline-touching-normal - repo: https://github.com/rstcheck/rstcheck - rev: v6.2.0 + rev: v6.2.5 hooks: - id: rstcheck additional_dependencies: [sphinx] args: ["--ignore-directives=doctest,testsetup,todo,automodule","--ignore-substitutions=release", "--report-level=error"] - repo: https://github.com/PyCQA/flake8 - rev: 3.9.2 + rev: 7.3.0 hooks: - id: flake8 files: \.rst$ additional_dependencies: [ 'flake8-rst==0.8.0', - 'flake8-bugbear==20.1.4', - 'flake8-logging-format==0.6.0', - 'flake8-implicit-str-concat==0.2.0', + 'flake8-bugbear==25.11.29', + 'flake8-logging-format==0.9.0', + 'flake8-implicit-str-concat==0.6.0', ] # We use the Python version instead of the original version which seems to require Docker # https://github.com/koalaman/shellcheck-precommit - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.9.0.6 + rev: v0.11.0.1 hooks: - id: shellcheck name: shellcheck @@ -69,14 +69,14 @@ repos: stages: [manual] - repo: https://github.com/PyCQA/doc8 - rev: v1.1.1 + rev: v2.0.0 hooks: - id: doc8 args: ["--ignore=D001"] # ignore line length stages: [manual] - repo: https://github.com/sirosen/check-jsonschema - rev: 0.27.0 + rev: 0.37.0 hooks: - id: check-jsonschema name: "Check GitHub Workflows" @@ -86,7 +86,7 @@ repos: stages: [manual] - repo: https://github.com/ariebovenberg/slotscheck - rev: v0.17.0 + rev: v0.19.1 hooks: - id: slotscheck files: \.py$ @@ -95,13 +95,13 @@ repos: args: ["--no-strict-imports"] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.16.2 + rev: v0.19.0 hooks: - id: cython-lint args: ["--no-pycodestyle"] - repo: https://github.com/codespell-project/codespell - rev: "v2.2.6" + rev: "v2.4.2" hooks: - id: codespell # Examples of errors or updates to justify the exceptions: diff --git a/bindings/python/benchmarks/benchmarks.py b/bindings/python/benchmarks/benchmarks.py index a1ba06ca..c95c741d 100644 --- a/bindings/python/benchmarks/benchmarks.py +++ b/bindings/python/benchmarks/benchmarks.py @@ -201,8 +201,7 @@ def setup(self): ) coll.insert_many([base_dict.copy() for _ in range(N_DOCS)]) print( - "%d docs, %dk each with %d keys" - % (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict)) + f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys" ) # All of the following tests are being skipped because NumPy/Pandas/Polars do not work with nested arrays. @@ -248,8 +247,7 @@ def setup(self): ) coll.insert_many([base_dict.copy() for _ in range(N_DOCS)]) print( - "%d docs, %dk each with %d keys" - % (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict)) + f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys" ) # All of the following tests are being skipped because NumPy/Pandas/Polars do not work with nested documents. @@ -287,8 +285,7 @@ def setup(self): ) coll.insert_many([base_dict.copy() for _ in range(N_DOCS)]) print( - "%d docs, %dk each with %d keys" - % (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict)) + f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys" ) @@ -304,8 +301,7 @@ def setup(self): base_dict = dict([(k, math.pi) for k in self.large_doc_keys]) coll.insert_many([base_dict.copy() for _ in range(N_DOCS)]) print( - "%d docs, %dk each with %d keys" - % (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict)) + f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys" ) @@ -324,8 +320,7 @@ def setup(self): ) coll.insert_many([base_dict.copy() for _ in range(N_DOCS)]) print( - "%d docs, %dk each with %d keys" - % (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict)) + f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys" ) # This must be skipped because arrow can't read the Decimal128Type @@ -348,8 +343,7 @@ def setup(self): base_dict = dict([(k, Decimal128(k)) for k in self.large_doc_keys]) coll.insert_many([base_dict.copy() for _ in range(N_DOCS)]) print( - "%d docs, %dk each with %d keys" - % (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict)) + f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys" ) # This must be skipped because arrow can't read the Decimal128Type @@ -371,8 +365,7 @@ def setup(self): base_dict = dict([("x", 1), ("y", math.pi)]) coll.insert_many([base_dict.copy() for _ in range(N_DOCS)]) print( - "%d docs, %dk each with %d keys" - % (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict)) + f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys" ) self.arrow_table = find_arrow_all(db.benchmark, {}, schema=self.schema) self.pandas_table = find_pandas_all(db.benchmark, {}, schema=self.schema) @@ -391,8 +384,7 @@ def setup(self): base_dict = dict([(k, math.pi) for k in self.large_doc_keys]) coll.insert_many([base_dict.copy() for _ in range(N_DOCS)]) print( - "%d docs, %dk each with %d keys" - % (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict)) + f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys" ) self.arrow_table = find_arrow_all(db.benchmark, {}, schema=self.schema) self.pandas_table = find_pandas_all(db.benchmark, {}, schema=self.schema) diff --git a/bindings/python/pymongoarrow/__init__.py b/bindings/python/pymongoarrow/__init__.py index bc14e94e..3f515555 100644 --- a/bindings/python/pymongoarrow/__init__.py +++ b/bindings/python/pymongoarrow/__init__.py @@ -42,8 +42,5 @@ if libbson_version is not None and _parse_version is not None: # noqa: SIM102 if _parse_version(libbson_version) < _parse_version(_MIN_LIBBSON_VERSION): - msg = ( - f"Expected libbson version {_MIN_LIBBSON_VERSION} or greater, " - f"found {libbson_version}" - ) + msg = f"Expected libbson version {_MIN_LIBBSON_VERSION} or greater, found {libbson_version}" raise ImportError(msg) diff --git a/bindings/python/pymongoarrow/api.py b/bindings/python/pymongoarrow/api.py index 1a7d5578..d8c8e3f4 100644 --- a/bindings/python/pymongoarrow/api.py +++ b/bindings/python/pymongoarrow/api.py @@ -64,16 +64,16 @@ from pymongoarrow.types import _validate_schema, get_numpy_type __all__ = [ + "Schema", "aggregate_arrow_all", - "find_arrow_all", - "aggregate_pandas_all", - "find_pandas_all", "aggregate_numpy_all", - "find_numpy_all", + "aggregate_pandas_all", "aggregate_polars_all", + "find_arrow_all", + "find_numpy_all", + "find_pandas_all", "find_polars_all", "write", - "Schema", ] diff --git a/bindings/python/pymongoarrow/lib.pyx b/bindings/python/pymongoarrow/lib.pyx index f8fdaee5..e9d735a1 100644 --- a/bindings/python/pymongoarrow/lib.pyx +++ b/bindings/python/pymongoarrow/lib.pyx @@ -836,7 +836,7 @@ cdef class DocumentBuilder(_ArrayBuilderBase): def finish(self): # Fields must be in order if we were given a schema. - return list(f.decode('utf-8') for f in self.field_map) + return [f.decode('utf-8') for f in self.field_map] cdef class ListBuilder(_ArrayBuilderBase): diff --git a/bindings/python/pymongoarrow/pandas_types.py b/bindings/python/pymongoarrow/pandas_types.py index e50fa137..ef1d8e78 100644 --- a/bindings/python/pymongoarrow/pandas_types.py +++ b/bindings/python/pymongoarrow/pandas_types.py @@ -183,6 +183,9 @@ def isna(self): dtype=bool, ) + def __hash__(self): + return hash(self.data) + def __eq__(self, other): return self.data == other @@ -267,6 +270,9 @@ def __arrow_array__(self, type=None): return pa.array(self.data, type=BinaryType(self.dtype.subtype)) + def __hash__(self): + return super().__hash__() + def __eq__(self, other): # Binary types do not support element-wise comparison. if isinstance(other, Binary): @@ -349,6 +355,9 @@ def __init__(self, values, dtype, copy=False) -> None: def _default_dtype(self): return PandasCode() + def __hash__(self): + return super().__hash__() + def __eq__(self, other): # Code types do not support element-wise comparison. if isinstance(other, Code): diff --git a/bindings/python/pymongoarrow/polars_types.py b/bindings/python/pymongoarrow/polars_types.py index 796f5169..5b074959 100644 --- a/bindings/python/pymongoarrow/polars_types.py +++ b/bindings/python/pymongoarrow/polars_types.py @@ -22,6 +22,9 @@ if pl: class PolarsExtensionBase(pl.datatypes.BaseExtension): + def __hash__(self): + return hash(self.ext_name(), self.ext_storage()) + def __eq__(self, other): return ( isinstance(other, self.__class__) diff --git a/bindings/python/pymongoarrow/schema.py b/bindings/python/pymongoarrow/schema.py index 72a91532..72e1c5ff 100644 --- a/bindings/python/pymongoarrow/schema.py +++ b/bindings/python/pymongoarrow/schema.py @@ -84,6 +84,9 @@ def _get_field_projection_value(self, fname, ftype, projection): projection[fname] = value return projection + def __hash__(self): + return hash(self.typemap) + def __eq__(self, other): if isinstance(other, type(self)): return self.typemap == other.typemap diff --git a/bindings/python/pymongoarrow/types.py b/bindings/python/pymongoarrow/types.py index e2a36f35..779f3fd9 100644 --- a/bindings/python/pymongoarrow/types.py +++ b/bindings/python/pymongoarrow/types.py @@ -255,7 +255,7 @@ def _is_code(obj): Decimal128: lambda _: Decimal128Type(), str: lambda _: string(), bool: lambda _: bool_(), - Binary: lambda subtype: BinaryType(subtype), + Binary: BinaryType, Code: lambda _: CodeType(), } diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index b37fd3e8..8ae5d30a 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -145,6 +145,7 @@ select = [ ignore = [ "PLR", # Design related pylint codes "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar` + "PLC0415", # `import` should be at the top-level of a file ] unfixable = [ ] diff --git a/bindings/python/test/conftest.py b/bindings/python/test/conftest.py index 4dca5f7a..004076f2 100644 --- a/bindings/python/test/conftest.py +++ b/bindings/python/test/conftest.py @@ -11,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from test import client_context - import pytest +from test import client_context + try: import pandas as pd diff --git a/bindings/python/test/pandas_types/test_binary.py b/bindings/python/test/pandas_types/test_binary.py index 657f9546..a716d136 100644 --- a/bindings/python/test/pandas_types/test_binary.py +++ b/bindings/python/test/pandas_types/test_binary.py @@ -11,13 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from test.pandas_types.util import base_make_data - import numpy as np import pytest from bson import Binary from pymongoarrow.pandas_types import PandasBinary, PandasBinaryArray +from test.pandas_types.util import base_make_data try: from pandas.tests.extension import base diff --git a/bindings/python/test/pandas_types/test_code.py b/bindings/python/test/pandas_types/test_code.py index 03bce1bc..86da6b7f 100644 --- a/bindings/python/test/pandas_types/test_code.py +++ b/bindings/python/test/pandas_types/test_code.py @@ -11,13 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from test.pandas_types.util import base_make_data - import numpy as np import pytest from bson import Code from pymongoarrow.pandas_types import PandasCode, PandasCodeArray +from test.pandas_types.util import base_make_data try: from pandas.tests.extension import base diff --git a/bindings/python/test/pandas_types/test_decimal128.py b/bindings/python/test/pandas_types/test_decimal128.py index 90d76887..5a9e7feb 100644 --- a/bindings/python/test/pandas_types/test_decimal128.py +++ b/bindings/python/test/pandas_types/test_decimal128.py @@ -11,13 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from test.pandas_types.util import base_make_data - import numpy as np import pytest from bson import Decimal128 from pymongoarrow.pandas_types import PandasDecimal128, PandasDecimal128Array +from test.pandas_types.util import base_make_data try: from pandas.tests.extension import base diff --git a/bindings/python/test/pandas_types/test_objectid.py b/bindings/python/test/pandas_types/test_objectid.py index 932c33c5..d4c5821c 100644 --- a/bindings/python/test/pandas_types/test_objectid.py +++ b/bindings/python/test/pandas_types/test_objectid.py @@ -11,13 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from test.pandas_types.util import base_make_data - import numpy as np import pytest from bson import ObjectId from pymongoarrow.pandas_types import PandasObjectId, PandasObjectIdArray +from test.pandas_types.util import base_make_data try: from pandas.tests.extension import base diff --git a/bindings/python/test/test_arrow.py b/bindings/python/test/test_arrow.py index 61014ef0..909e7745 100644 --- a/bindings/python/test/test_arrow.py +++ b/bindings/python/test/test_arrow.py @@ -21,8 +21,6 @@ import unittest.mock as mock from datetime import date, datetime, timedelta from pathlib import Path -from test import client_context -from test.utils import AllowListEventListener, NullsTestMixin import bson import pyarrow as pa @@ -65,6 +63,8 @@ Decimal128Type, ObjectIdType, ) +from test import client_context +from test.utils import AllowListEventListener, NullsTestMixin try: import pandas as pd @@ -1171,26 +1171,28 @@ def compare_arrow_mongodb_data(self, arrow_table, mongo_data): if isinstance(arrow_value, decimal.Decimal): assert ( Decimal128(arrow_value).to_decimal() == Decimal128(mongo_value).to_decimal() - ), f"Precision loss in decimal field '{column_name}' for row {row_idx}. Expected {arrow_value}, got {mongo_value}." + ), ( + f"Precision loss in decimal field '{column_name}' for row {row_idx}. Expected {arrow_value}, got {mongo_value}." + ) elif isinstance(arrow_value, (np.datetime64, pd.Timestamp, datetime)): arrow_value_rounded = pd.Timestamp(arrow_value).round( "ms" ) # Round to milliseconds - assert ( - arrow_value_rounded.to_pydatetime() == mongo_value - ), f"Datetime mismatch in field '{column_name}' for row {row_idx}. Expected {arrow_value_rounded}, got {mongo_value}." + assert arrow_value_rounded.to_pydatetime() == mongo_value, ( + f"Datetime mismatch in field '{column_name}' for row {row_idx}. Expected {arrow_value_rounded}, got {mongo_value}." + ) elif isinstance(arrow_value, (list, np.ndarray)): - assert ( - arrow_value == mongo_value - ), f"List mismatch in field '{column_name}' for row {row_idx}. Expected {arrow_value}, got {mongo_value}." + assert arrow_value == mongo_value, ( + f"List mismatch in field '{column_name}' for row {row_idx}. Expected {arrow_value}, got {mongo_value}." + ) elif isinstance(arrow_value, timedelta): - assert ( - arrow_value == timedelta(seconds=mongo_value) - ), f"Timedelta mismatch in field '{column_name}' for row {row_idx}. Expected {arrow_value}, got {mongo_value}." + assert arrow_value == timedelta(seconds=mongo_value), ( + f"Timedelta mismatch in field '{column_name}' for row {row_idx}. Expected {arrow_value}, got {mongo_value}." + ) else: - assert ( - arrow_value == mongo_value - ), f"Value mismatch in field '{column_name}' for row {row_idx}. Expected {arrow_value}, got {mongo_value}." + assert arrow_value == mongo_value, ( + f"Value mismatch in field '{column_name}' for row {row_idx}. Expected {arrow_value}, got {mongo_value}." + ) def test_all_types(self): """ diff --git a/bindings/python/test/test_builders.py b/bindings/python/test/test_builders.py index 0db38af2..4cff9304 100644 --- a/bindings/python/test/test_builders.py +++ b/bindings/python/test/test_builders.py @@ -17,7 +17,6 @@ from bson import Binary, Code, Decimal128, ObjectId, encode from pyarrow import Array, bool_, int32, int64, timestamp - from pymongoarrow.lib import ( BinaryBuilder, BoolBuilder, diff --git a/bindings/python/test/test_datetime.py b/bindings/python/test/test_datetime.py index abd438a5..fb9523a8 100644 --- a/bindings/python/test/test_datetime.py +++ b/bindings/python/test/test_datetime.py @@ -13,7 +13,6 @@ # limitations under the License. import unittest from datetime import datetime, timedelta, timezone -from test import client_context import pytz from bson.codec_options import CodecOptions @@ -23,6 +22,7 @@ from pymongoarrow.api import Schema, find_arrow_all from pymongoarrow.context import PyMongoArrowContext +from test import client_context class TestDateTimeType(unittest.TestCase): diff --git a/bindings/python/test/test_numpy.py b/bindings/python/test/test_numpy.py index bfdfe1d0..48f7abc3 100644 --- a/bindings/python/test/test_numpy.py +++ b/bindings/python/test/test_numpy.py @@ -14,8 +14,6 @@ # from datetime import datetime, timedelta import datetime import unittest -from test import client_context -from test.utils import AllowListEventListener, NullsTestMixin from unittest import mock import numpy as np @@ -28,6 +26,8 @@ from pymongoarrow.api import Schema, aggregate_numpy_all, find_numpy_all, write from pymongoarrow.errors import ArrowWriteError from pymongoarrow.types import _TYPE_NORMALIZER_FACTORY, Decimal128Type, ObjectIdType +from test import client_context +from test.utils import AllowListEventListener, NullsTestMixin class NumpyTestBase(unittest.TestCase): diff --git a/bindings/python/test/test_pandas.py b/bindings/python/test/test_pandas.py index 2f7a9c78..bb2c7a59 100644 --- a/bindings/python/test/test_pandas.py +++ b/bindings/python/test/test_pandas.py @@ -19,8 +19,6 @@ import unittest import unittest.mock as mock import warnings -from test import client_context -from test.utils import AllowListEventListener, NullsTestMixin import numpy as np import pyarrow as pa @@ -35,6 +33,8 @@ from pymongoarrow.errors import ArrowWriteError from pymongoarrow.pandas_types import PandasBSONDtype, PandasDecimal128, PandasObjectId from pymongoarrow.types import _TYPE_NORMALIZER_FACTORY, Decimal128Type, ObjectIdType +from test import client_context +from test.utils import AllowListEventListener, NullsTestMixin try: import pandas as pd @@ -430,7 +430,7 @@ def find_fn(self, coll, query, schema): def equal_fn(self, left, right): left = left.fillna(0) right = right.fillna(0) - if type(left) == pandas.DataFrame: + if isinstance(left, pandas.DataFrame): pandas.testing.assert_frame_equal(left, right, check_dtype=False) else: pandas.testing.assert_series_equal(left, right, check_dtype=False) diff --git a/bindings/python/test/test_polars.py b/bindings/python/test/test_polars.py index 5cdce93a..4c06a43c 100644 --- a/bindings/python/test/test_polars.py +++ b/bindings/python/test/test_polars.py @@ -18,8 +18,6 @@ import unittest.mock as mock import uuid from datetime import datetime -from test import client_context -from test.utils import AllowListEventListener import bson import pyarrow as pa @@ -38,6 +36,8 @@ Decimal128Type, ObjectIdType, ) +from test import client_context +from test.utils import AllowListEventListener try: import polars as pl diff --git a/bindings/python/test/test_pymongoarrow.py b/bindings/python/test/test_pymongoarrow.py index 318a5f15..e79d9f17 100644 --- a/bindings/python/test/test_pymongoarrow.py +++ b/bindings/python/test/test_pymongoarrow.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test import client_context from pymongoarrow.api import find_arrow_all, find_pandas_all, find_polars_all from pymongoarrow.schema import Schema from pymongoarrow.version import __version__ +from test import client_context class TestPyMongoArrow(unittest.TestCase): diff --git a/bindings/python/test/utils.py b/bindings/python/test/utils.py index 9b1f419a..a1913bc2 100644 --- a/bindings/python/test/utils.py +++ b/bindings/python/test/utils.py @@ -14,7 +14,6 @@ import datetime import unittest from collections import defaultdict -from test import client_context import numpy as np import pyarrow as pa @@ -30,6 +29,7 @@ ObjectIdType, _in_type_map, ) +from test import client_context def isnan(inp):