Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v6.0.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
Expand All @@ -19,14 +19,14 @@ repos:

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.3
rev: v0.15.6
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
- id: ruff-format

- repo: https://github.com/adamchainz/blacken-docs
rev: "1.16.0"
rev: "1.20.0"
hooks:
- id: blacken-docs
additional_dependencies:
Expand All @@ -40,43 +40,43 @@ repos:
- id: rst-inline-touching-normal

- repo: https://github.com/rstcheck/rstcheck
rev: v6.2.0
rev: v6.2.5
hooks:
- id: rstcheck
additional_dependencies: [sphinx]
args: ["--ignore-directives=doctest,testsetup,todo,automodule","--ignore-substitutions=release", "--report-level=error"]

- repo: https://github.com/PyCQA/flake8
rev: 3.9.2
rev: 7.3.0
hooks:
- id: flake8
files: \.rst$
additional_dependencies: [
'flake8-rst==0.8.0',
'flake8-bugbear==20.1.4',
'flake8-logging-format==0.6.0',
'flake8-implicit-str-concat==0.2.0',
'flake8-bugbear==25.11.29',
'flake8-logging-format==0.9.0',
'flake8-implicit-str-concat==0.6.0',
]

# We use the Python version instead of the original version which seems to require Docker
# https://github.com/koalaman/shellcheck-precommit
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.9.0.6
rev: v0.11.0.1
hooks:
- id: shellcheck
name: shellcheck
args: ["--severity=warning"]
stages: [manual]

- repo: https://github.com/PyCQA/doc8
rev: v1.1.1
rev: v2.0.0
hooks:
- id: doc8
args: ["--ignore=D001"] # ignore line length
stages: [manual]

- repo: https://github.com/sirosen/check-jsonschema
rev: 0.27.0
rev: 0.37.0
hooks:
- id: check-jsonschema
name: "Check GitHub Workflows"
Expand All @@ -86,7 +86,7 @@ repos:
stages: [manual]

- repo: https://github.com/ariebovenberg/slotscheck
rev: v0.17.0
rev: v0.19.1
hooks:
- id: slotscheck
files: \.py$
Expand All @@ -95,13 +95,13 @@ repos:
args: ["--no-strict-imports"]

- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.2
rev: v0.19.0
hooks:
- id: cython-lint
args: ["--no-pycodestyle"]

- repo: https://github.com/codespell-project/codespell
rev: "v2.2.6"
rev: "v2.4.2"
hooks:
- id: codespell
# Examples of errors or updates to justify the exceptions:
Expand Down
24 changes: 8 additions & 16 deletions bindings/python/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,7 @@ def setup(self):
)
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
print(
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys"
)

# All of the following tests are being skipped because NumPy/Pandas/Polars do not work with nested arrays.
Expand Down Expand Up @@ -248,8 +247,7 @@ def setup(self):
)
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
print(
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys"
)

# All of the following tests are being skipped because NumPy/Pandas/Polars do not work with nested documents.
Expand Down Expand Up @@ -287,8 +285,7 @@ def setup(self):
)
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
print(
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys"
)


Expand All @@ -304,8 +301,7 @@ def setup(self):
base_dict = dict([(k, math.pi) for k in self.large_doc_keys])
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
print(
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys"
)


Expand All @@ -324,8 +320,7 @@ def setup(self):
)
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
print(
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys"
)

# This must be skipped because arrow can't read the Decimal128Type
Expand All @@ -348,8 +343,7 @@ def setup(self):
base_dict = dict([(k, Decimal128(k)) for k in self.large_doc_keys])
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
print(
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys"
)

# This must be skipped because arrow can't read the Decimal128Type
Expand All @@ -371,8 +365,7 @@ def setup(self):
base_dict = dict([("x", 1), ("y", math.pi)])
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
print(
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys"
)
self.arrow_table = find_arrow_all(db.benchmark, {}, schema=self.schema)
self.pandas_table = find_pandas_all(db.benchmark, {}, schema=self.schema)
Expand All @@ -391,8 +384,7 @@ def setup(self):
base_dict = dict([(k, math.pi) for k in self.large_doc_keys])
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
print(
"%d docs, %dk each with %d keys"
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
f"{N_DOCS} docs, {len(BSON.encode(base_dict)) // 1024} each with {len(base_dict)} keys"
)
self.arrow_table = find_arrow_all(db.benchmark, {}, schema=self.schema)
self.pandas_table = find_pandas_all(db.benchmark, {}, schema=self.schema)
Expand Down
5 changes: 1 addition & 4 deletions bindings/python/pymongoarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,5 @@

if libbson_version is not None and _parse_version is not None: # noqa: SIM102
if _parse_version(libbson_version) < _parse_version(_MIN_LIBBSON_VERSION):
msg = (
f"Expected libbson version {_MIN_LIBBSON_VERSION} or greater, "
f"found {libbson_version}"
)
msg = f"Expected libbson version {_MIN_LIBBSON_VERSION} or greater, found {libbson_version}"
raise ImportError(msg)
10 changes: 5 additions & 5 deletions bindings/python/pymongoarrow/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,16 @@
from pymongoarrow.types import _validate_schema, get_numpy_type

__all__ = [
"Schema",
"aggregate_arrow_all",
"find_arrow_all",
"aggregate_pandas_all",
"find_pandas_all",
"aggregate_numpy_all",
"find_numpy_all",
"aggregate_pandas_all",
"aggregate_polars_all",
"find_arrow_all",
"find_numpy_all",
"find_pandas_all",
"find_polars_all",
"write",
"Schema",
]


Expand Down
2 changes: 1 addition & 1 deletion bindings/python/pymongoarrow/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ cdef class DocumentBuilder(_ArrayBuilderBase):

def finish(self):
# Fields must be in order if we were given a schema.
return list(f.decode('utf-8') for f in self.field_map)
return [f.decode('utf-8') for f in self.field_map]


cdef class ListBuilder(_ArrayBuilderBase):
Expand Down
9 changes: 9 additions & 0 deletions bindings/python/pymongoarrow/pandas_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ def isna(self):
dtype=bool,
)

def __hash__(self):
return hash(self.data)

def __eq__(self, other):
return self.data == other

Expand Down Expand Up @@ -267,6 +270,9 @@ def __arrow_array__(self, type=None):

return pa.array(self.data, type=BinaryType(self.dtype.subtype))

def __hash__(self):
return super().__hash__()

def __eq__(self, other):
# Binary types do not support element-wise comparison.
if isinstance(other, Binary):
Expand Down Expand Up @@ -349,6 +355,9 @@ def __init__(self, values, dtype, copy=False) -> None:
def _default_dtype(self):
return PandasCode()

def __hash__(self):
return super().__hash__()

def __eq__(self, other):
# Code types do not support element-wise comparison.
if isinstance(other, Code):
Expand Down
3 changes: 3 additions & 0 deletions bindings/python/pymongoarrow/polars_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
if pl:

class PolarsExtensionBase(pl.datatypes.BaseExtension):
def __hash__(self):
return hash(self.ext_name(), self.ext_storage())

def __eq__(self, other):
return (
isinstance(other, self.__class__)
Expand Down
3 changes: 3 additions & 0 deletions bindings/python/pymongoarrow/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def _get_field_projection_value(self, fname, ftype, projection):
projection[fname] = value
return projection

def __hash__(self):
return hash(self.typemap)

def __eq__(self, other):
if isinstance(other, type(self)):
return self.typemap == other.typemap
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/pymongoarrow/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def _is_code(obj):
Decimal128: lambda _: Decimal128Type(),
str: lambda _: string(),
bool: lambda _: bool_(),
Binary: lambda subtype: BinaryType(subtype),
Binary: BinaryType,
Code: lambda _: CodeType(),
}

Expand Down
1 change: 1 addition & 0 deletions bindings/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ select = [
ignore = [
"PLR", # Design related pylint codes
"RUF012", # Mutable class attributes should be annotated with `typing.ClassVar`
"PLC0415", # `import` should be at the top-level of a file
]
unfixable = [
]
Expand Down
4 changes: 2 additions & 2 deletions bindings/python/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from test import client_context

import pytest

from test import client_context

try:
import pandas as pd

Expand Down
3 changes: 1 addition & 2 deletions bindings/python/test/pandas_types/test_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from test.pandas_types.util import base_make_data

import numpy as np
import pytest
from bson import Binary

from pymongoarrow.pandas_types import PandasBinary, PandasBinaryArray
from test.pandas_types.util import base_make_data

try:
from pandas.tests.extension import base
Expand Down
3 changes: 1 addition & 2 deletions bindings/python/test/pandas_types/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from test.pandas_types.util import base_make_data

import numpy as np
import pytest
from bson import Code

from pymongoarrow.pandas_types import PandasCode, PandasCodeArray
from test.pandas_types.util import base_make_data

try:
from pandas.tests.extension import base
Expand Down
3 changes: 1 addition & 2 deletions bindings/python/test/pandas_types/test_decimal128.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from test.pandas_types.util import base_make_data

import numpy as np
import pytest
from bson import Decimal128

from pymongoarrow.pandas_types import PandasDecimal128, PandasDecimal128Array
from test.pandas_types.util import base_make_data

try:
from pandas.tests.extension import base
Expand Down
3 changes: 1 addition & 2 deletions bindings/python/test/pandas_types/test_objectid.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from test.pandas_types.util import base_make_data

import numpy as np
import pytest
from bson import ObjectId

from pymongoarrow.pandas_types import PandasObjectId, PandasObjectIdArray
from test.pandas_types.util import base_make_data

try:
from pandas.tests.extension import base
Expand Down
Loading
Loading