Skip to content

perf: Port FrozenOrderedSet to rust#23200

Open
tobni wants to merge 4 commits into
pantsbuild:mainfrom
tobni:add/port-frozen-ordered-set
Open

perf: Port FrozenOrderedSet to rust#23200
tobni wants to merge 4 commits into
pantsbuild:mainfrom
tobni:add/port-frozen-ordered-set

Conversation

@tobni
Copy link
Copy Markdown
Contributor

@tobni tobni commented Mar 29, 2026

Followup to #22501. Same approach — FrozenOrderedSet is now a pyo3 #[pyclass] backed by Py<PyDict> with lazy hash via OnceLock. The end goal is porting more rule code to rust intrinsics.

"""Benchmark: Rust FrozenOrderedSet vs Python FrozenOrderedSet."""

import sys
import timeit
from collections.abc import Hashable, Iterable, Iterator
from typing import AbstractSet, Any, TypeVar

sys.path.insert(0, "src/python")

from pants.engine.internals.native_engine import FrozenOrderedSet as RustFrozenOrderedSet

T = TypeVar("T")


class PyFrozenOrderedSet(AbstractSet[T], Hashable):
    """The old pure-Python FrozenOrderedSet (pre-port)."""

    def __init__(self, iterable=None):
        self._items = dict.fromkeys(iterable) if iterable else {}
        self._hash = None

    def __len__(self):
        return len(self._items)

    def __contains__(self, key):
        return key in self._items

    def __iter__(self) -> Iterator:
        return iter(self._items)

    def __reversed__(self):
        return reversed(tuple(self._items.keys()))

    def __eq__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented
        return len(self._items) == len(other._items) and all(
            x == y for x, y in zip(self._items, other._items)
        )

    def __hash__(self):
        if self._hash is None:
            self._hash = 0
            for item in self._items.keys():
                self._hash ^= hash(item)
        return self._hash

    def __repr__(self):
        return f"PyFrozenOrderedSet({list(self)!r})"

    def __bool__(self):
        return bool(self._items)

    def union(self, other):
        return self.__class__(list(self) + [x for x in other if x not in self._items])

    def intersection(self, other):
        s = set(other)
        return self.__class__(x for x in self if x in s)

    def difference(self, other):
        s = set(other)
        return self.__class__(x for x in self if x not in s)

    def issubset(self, other):
        if len(self) > len(other):
            return False
        return all(item in other for item in self)


WARMUP = 1000

def measure(stmt, number, globs):
    timeit.timeit(stmt, number=WARMUP, globals=globs)
    t = timeit.timeit(stmt, number=number, globals=globs)
    return t / number * 1_000_000


BENCHMARKS = [
    ("Construction", "Cls(data)", lambda data, py, rs, **_: [
        {"Cls": PyFrozenOrderedSet, "data": data},
        {"Cls": RustFrozenOrderedSet, "data": data},
    ]),
    ("hash()", "hash(fd)", lambda py, rs, **_: [
        {"fd": py},
        {"fd": rs},
    ]),
    ("__contains__", "k in fd", lambda py, rs, mid, **_: [
        {"fd": py, "k": mid},
        {"fd": rs, "k": mid},
    ]),
    ("__contains__ miss", "k in fd", lambda py, rs, **_: [
        {"fd": py, "k": "MISSING"},
        {"fd": rs, "k": "MISSING"},
    ]),
    ("__eq__", "fd == fd2", lambda py, rs, py2, rs2, **_: [
        {"fd": py, "fd2": py2},
        {"fd": rs, "fd2": rs2},
    ]),
    ("iteration", "list(fd)", lambda py, rs, **_: [
        {"fd": py},
        {"fd": rs},
    ]),
    ("union", "fd.union(other)", lambda py, rs, py_other, rs_other, **_: [
        {"fd": py, "other": py_other},
        {"fd": rs, "other": rs_other},
    ]),
    ("intersection", "fd.intersection(other)", lambda py, rs, py_other, rs_other, **_: [
        {"fd": py, "other": py_other},
        {"fd": rs, "other": rs_other},
    ]),
    ("difference", "fd.difference(other)", lambda py, rs, py_other, rs_other, **_: [
        {"fd": py, "other": py_other},
        {"fd": rs, "other": rs_other},
    ]),
    ("issubset", "small.issubset(fd)", lambda py, rs, py_small, rs_small, **_: [
        {"small": py_small, "fd": py},
        {"small": rs_small, "fd": rs},
    ]),
    ("dict key", "d[fd]", lambda py, rs, **_: [
        {"fd": py, "d": {py: 1}},
        {"fd": rs, "d": {rs: 1}},
    ]),
]

SMALL = list(range(5))
MEDIUM = list(range(20))
LARGE = list(range(200))

DATASETS = [("small (5)", SMALL), ("medium (20)", MEDIUM), ("large (200)", LARGE)]

all_results: dict[str, dict[str, tuple[float, float]]] = {}

for ds_name, data in DATASETS:
    print(f"\n{'=' * 60}")
    print(f"  Dataset: {ds_name}")
    print(f"{'=' * 60}")

    py = PyFrozenOrderedSet(data)
    rs = RustFrozenOrderedSet(data)
    py2 = PyFrozenOrderedSet(data)
    rs2 = RustFrozenOrderedSet(data)
    half = data[:len(data) // 2]
    py_other = PyFrozenOrderedSet(half + list(range(1000, 1000 + len(half))))
    rs_other = RustFrozenOrderedSet(half + list(range(1000, 1000 + len(half))))
    py_small = PyFrozenOrderedSet(data[:3])
    rs_small = RustFrozenOrderedSet(data[:3])
    # Warm up lazy hashes
    for obj in (py, py2, py_other, py_small):
        hash(obj)
    n = 500_000 if len(data) <= 20 else 50_000
    mid = data[len(data) // 2]

    ctx = dict(data=data, py=py, rs=rs, py2=py2, rs2=rs2, mid=mid,
               py_other=py_other, rs_other=rs_other, py_small=py_small, rs_small=rs_small)

    for bench_name, stmt, make_globs in BENCHMARKS:
        py_globs, rs_globs = make_globs(**ctx)
        py_us = measure(stmt, n, py_globs)
        rs_us = measure(stmt, n, rs_globs)
        print(f"  {bench_name:.<20s} Python {py_us:8.3f} µs  Rust {rs_us:8.3f} µs  ({py_us / rs_us:.1f}x)")
        all_results.setdefault(bench_name, {})[ds_name] = (py_us, rs_us)

ds_names = [name for name, _ in DATASETS]
header = f"  {'Operation':<20s}" + "".join(f" | {name:>12s}" for name in ds_names)
sep = f"  {'-'*20}" + "".join(f"-+-{'-'*12}" for _ in ds_names)

print(f"\n{'=' * 60}")
print("  Summary (Python / Rust speedup)")
print(f"{'=' * 60}")
print(header)
print(sep)
for bench_name, _, _ in BENCHMARKS:
    row = f"  {bench_name:<20s}"
    for ds_name in ds_names:
        py_us, rs_us = all_results[bench_name][ds_name]
        ratio = py_us / rs_us
        row += f" | {ratio:11.1f}x"
    print(row)
  Operation            |    small (5) |  medium (20) |  large (200)
  ---------------------+--------------+--------------+-------------
  Construction         |         1.2x |         1.1x |         1.0x
  hash()               |         2.2x |         2.2x |         2.2x
  __contains__         |         1.8x |         1.7x |         1.7x
  __contains__ miss    |         1.8x |         1.8x |         1.8x
  __eq__               |         3.6x |         2.0x |         1.4x
  iteration            |         1.5x |         1.4x |         1.1x
  union                |         4.1x |         3.0x |         2.5x
  intersection         |         2.7x |         1.7x |         1.2x
  difference           |         3.0x |         1.8x |         1.3x
  issubset             |         7.1x |         6.8x |         7.0x
  dict key             |         2.1x |         2.0x |         2.0x

@tobni tobni force-pushed the add/port-frozen-ordered-set branch from 42a564e to ca376c9 Compare March 29, 2026 13:25
@tobni tobni added category:internal CI, fixes for not-yet-released features, etc. release-notes:not-required [CI] PR doesn't require mention in release notes labels Mar 29, 2026
@tobni tobni force-pushed the add/port-frozen-ordered-set branch 7 times, most recently from ee17c67 to 0bf4cba Compare March 29, 2026 18:45
@cburroughs
Copy link
Copy Markdown
Contributor

Really wish I knew rust better for all these cool performance cases. Cross referencing: #14719

@tdyas
Copy link
Copy Markdown
Contributor

tdyas commented May 19, 2026

What about just porting FrozenOrderedSet usages to the Python standard frozenset?

@tdyas
Copy link
Copy Markdown
Contributor

tdyas commented May 19, 2026

What about just porting FrozenOrderedSet usages to the Python standard frozenset?

I assume because frozenset is unordered, but is that a property we need for use cases where we are using a set type?

@tobni
Copy link
Copy Markdown
Contributor Author

tobni commented May 24, 2026

I assume because frozenset is unordered, but is that a property we need for use cases where we are using a set type?

I dont know. It is used a lot.

Copy link
Copy Markdown
Contributor

@cburroughs cburroughs left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The __new__ --> __init__ thing needs a oneline mention in the release notes I think, on the off chance someone was using the class?

I ran it through some local benchmarks which suggest single digit percentage performance improvements. I think that matches your expectations? As idle curiosity, I wonder how much of the benefit comes from the lazy hash computation.

.map(|o| to_pyset(&o))
.collect::<PyResult<Vec<_>>>()?;
filter_keys(self, py, |key| {
Ok(sets.iter().all(|s| s.contains(key).unwrap_or(false)))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rust clarity: Is the unwrap_or here intentional? I'm not sure why intersection should handle this type of error differently than difference?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not intentional. This would set.__contains__ throwing an exception. Not sure that ever happens, but better to fix it and let the weird case be visible at least 🤷‍♂️

@tobni tobni force-pushed the add/port-frozen-ordered-set branch from c52ba37 to 5b7d8f5 Compare May 30, 2026 15:20
@tobni tobni force-pushed the add/port-frozen-ordered-set branch from 5b7d8f5 to 5bdcfaf Compare May 30, 2026 15:23
@tobni
Copy link
Copy Markdown
Contributor Author

tobni commented May 30, 2026

The __new__ --> __init__ thing needs a oneline mention in the release notes I think, on the off chance someone was using the class?

I ran it through some local benchmarks which suggest single digit percentage performance improvements. I think that matches your expectations? As idle curiosity, I wonder how much of the benefit comes from the lazy hash computation.

Aye, around 1%, it scales well with large repos with many deps. The python impl also enjoyed a lazy hash, so the port should be net-nothing if it is never accessed. It almost always is though...

I've updated release notes and fixed the correctness bug you caught

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

category:internal CI, fixes for not-yet-released features, etc. release-notes:not-required [CI] PR doesn't require mention in release notes

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants