diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 00000000..be6294d2 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,180 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "foamlib" +version = "1.5.4" +dependencies = [ + "pyo3", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + +[[package]] +name = "libc" +version = "0.2.178" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..5bb87148 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "foamlib" +version = "1.5.4" +edition = "2021" + +[lib] +name = "foamlib_rust" +crate-type = ["cdylib"] +path = "rust_src/lib.rs" + +[dependencies] +pyo3 = { version = "0.23", features = ["extension-module"] } diff --git a/pyproject.toml b/pyproject.toml index 2397a624..8d83cecf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["uv_build>=0.9,<0.10"] -build-backend = "uv_build" +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" [project] name = "foamlib" @@ -119,3 +119,7 @@ extend-ignore = [ [tool.ruff.lint.pydocstyle] convention = "pep257" + +[tool.maturin] +python-source = "src" +module-name = "foamlib.foamlib_rust" diff --git a/rust_src/lib.rs b/rust_src/lib.rs new file mode 100644 index 00000000..8b947f22 --- /dev/null +++ b/rust_src/lib.rs @@ -0,0 +1,140 @@ +use pyo3::prelude::*; + +// Whitespace lookup table +static IS_WHITESPACE: [bool; 256] = { + let mut table = [false; 256]; + table[b' ' as usize] = true; + table[b'\n' as usize] = true; + table[b'\t' as usize] = true; + table[b'\r' as usize] = true; + table[b'\x0C' as usize] = true; // \f + table[b'\x0B' as usize] = true; // \v + table +}; + +// Whitespace without newline lookup table +static IS_WHITESPACE_NO_NEWLINE: [bool; 256] = { + let mut table = [false; 256]; + table[b' ' as usize] = true; + table[b'\t' as usize] = true; + table[b'\r' as usize] = true; + table[b'\x0C' as usize] = true; // \f + table[b'\x0B' as usize] = true; // \v + table +}; + +/// Skip whitespace and comments in OpenFOAM files +/// +/// This function skips over whitespace and comments (both // and /* */ style). +/// It handles line continuations with backslash in line comments. +/// +/// # Arguments +/// * `contents` - The file contents as bytes or bytearray +/// * `pos` - Current position in the file +/// * `newline_ok` - Whether newlines should be skipped (default: True) +/// +/// # Returns +/// The new position after skipping whitespace and comments +#[pyfunction] +#[pyo3(signature = (contents, pos, *, newline_ok=true))] +fn skip(contents: &Bound<'_, pyo3::types::PyAny>, mut pos: usize, newline_ok: bool) -> PyResult { + // Extract bytes from either bytes or bytearray + let contents = if let Ok(bytes) = contents.downcast::() { + bytes.as_bytes() + } else if let Ok(bytearray) = contents.downcast::() { + // SAFETY: This is safe because: + // 1. We only read from the bytearray (immutable access) + // 2. The reference doesn't escape this function + // 3. The GIL prevents concurrent modification by Python code + unsafe { bytearray.as_bytes() } + } else { + return Err(PyErr::new::( + "contents must be bytes or bytearray" + )); + }; + let is_whitespace = if newline_ok { + &IS_WHITESPACE + } else { + &IS_WHITESPACE_NO_NEWLINE + }; + + loop { + // Skip whitespace + while pos < contents.len() && is_whitespace[contents[pos] as usize] { + pos += 1; + } + + // Check if we're at the end of content + if pos >= contents.len() { + break; + } + + // Check for comments + if pos + 1 < contents.len() { + let next1 = contents[pos]; + let next2 = contents[pos + 1]; + + // Single-line comment // + if next1 == b'/' && next2 == b'/' { + pos += 2; + loop { + if pos >= contents.len() { + break; + } + + if contents[pos] == b'\n' { + if newline_ok { + pos += 1; + } + break; + } + + // Handle line continuation + if contents[pos] == b'\\' && pos + 1 < contents.len() && contents[pos + 1] == b'\n' { + pos += 2; + continue; + } + + pos += 1; + } + continue; + } + + // Multi-line comment /* */ + if next1 == b'/' && next2 == b'*' { + pos += 2; + + // Find the closing */ + let mut found = false; + while pos + 1 < contents.len() { + if contents[pos] == b'*' && contents[pos + 1] == b'/' { + pos += 2; + found = true; + break; + } + pos += 1; + } + + if !found { + return Err(PyErr::new::( + format!("Unterminated comment at position {}", pos) + )); + } + + continue; + } + } + + // No more whitespace or comments + break; + } + + Ok(pos) +} + +/// A Python module implemented in Rust for performance-critical parsing operations. +#[pymodule] +fn foamlib_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(skip, m)?)?; + Ok(()) +} diff --git a/src/foamlib/_files/_parsing/_parser.py b/src/foamlib/_files/_parsing/_parser.py index 0adc2fc0..b4468d6a 100644 --- a/src/foamlib/_files/_parsing/_parser.py +++ b/src/foamlib/_files/_parsing/_parser.py @@ -13,6 +13,13 @@ from typing_extensions import Unpack, assert_never import numpy as np + +# Import the Rust implementation of _skip +try: + from foamlib.foamlib_rust import skip as _skip_rust + _USE_RUST_SKIP = True +except ImportError: + _USE_RUST_SKIP = False from multicollections import MultiDict from ...typing import ( @@ -84,6 +91,23 @@ def _skip( *, newline_ok: bool = True, ) -> int: + # Use Rust implementation if available + if _USE_RUST_SKIP: + try: + return _skip_rust(contents, pos, newline_ok=newline_ok) + except ValueError as e: + # Convert ValueError from Rust to FoamFileDecodeError + # The Rust implementation raises ValueError for unterminated comments + if "Unterminated comment" in str(e): + raise FoamFileDecodeError( + contents, + len(contents), + expected="*/", + ) from e + # Re-raise other ValueErrors (e.g., type errors) + raise + + # Fallback to Python implementation is_whitespace = _IS_WHITESPACE if newline_ok else _IS_WHITESPACE_NO_NEWLINE with contextlib.suppress(IndexError): @@ -104,9 +128,8 @@ def _skip( if contents[pos] == ord("\\"): with contextlib.suppress(IndexError): if contents[pos + 1] == ord("\n"): - pos += 1 - pos += 1 - continue + pos += 2 + continue pos += 1 continue