Skip to content

Commit 67a7661

Browse files
Copilotgerlero
andcommitted
Add _parse_number function to C extension module
Co-authored-by: gerlero <[email protected]>
1 parent a88c444 commit 67a7661

File tree

2 files changed

+217
-32
lines changed

2 files changed

+217
-32
lines changed

src/foamlib/_files/_parsing/_parser.py

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
SubDict,
3030
Tensor,
3131
)
32+
from ._skip_ext import ParseError as _CParseError
33+
from ._skip_ext import _parse_number as _parse_number_c
3234
from ._skip_ext import _skip
3335
from .exceptions import FoamFileDecodeError
3436

@@ -47,14 +49,6 @@
4749
for c in b"0123456789._<>#$:+-*/|^%&=!":
4850
_IS_TOKEN_CONTINUATION[c] = True
4951

50-
_IS_POSSIBLE_FLOAT = [False] * 256
51-
for c in b"0123456789.-+eEinfnatyINFNATY":
52-
_IS_POSSIBLE_FLOAT[c] = True
53-
54-
_IS_POSSIBLE_INTEGER = [False] * 256
55-
for c in b"0123456789-+":
56-
_IS_POSSIBLE_INTEGER[c] = True
57-
5852
_COMMENTS = re.compile(rb"(?:(?:/\*(?:[^*]|\*(?!/))*\*/)|(?://(?:\\\n|[^\n])*))+")
5953
_SKIP = re.compile(rb"(?:\s+|" + _COMMENTS.pattern + rb")+")
6054
_POSSIBLE_FLOAT = re.compile(rb"[0-9.\-+einfatyEINFATY]+", re.ASCII)
@@ -164,31 +158,13 @@ def _parse_number(
164158
*,
165159
target: type[int] | type[float] | type[int | float] = int | float,
166160
) -> tuple[int | float, int]:
167-
is_numeric = _IS_POSSIBLE_INTEGER if target is int else _IS_POSSIBLE_FLOAT
168-
end = pos
169-
with contextlib.suppress(IndexError):
170-
while is_numeric[contents[end]]:
171-
end += 1
172-
173-
if _IS_TOKEN_CONTINUATION[contents[end]]:
174-
raise ParseError(contents, pos, expected="number")
175-
176-
if pos == end:
177-
raise ParseError(contents, pos, expected="number")
178-
179-
chars = contents[pos:end]
180-
if target is not float:
181-
try:
182-
return int(chars), end
183-
except ValueError as e:
184-
if target is int:
185-
raise ParseError(contents, pos, expected="integer") from e
161+
"""Parse a number from contents and convert C ParseError to Python ParseError."""
186162
try:
187-
return float(chars), end
188-
except ValueError as e:
189-
if target is float:
190-
raise ParseError(contents, pos, expected="float") from e
191-
raise ParseError(contents, pos, expected="number") from e
163+
return _parse_number_c(contents, pos, target=target)
164+
except _CParseError as e:
165+
# Convert C ParseError to Python ParseError
166+
# The C exception has attributes _contents, pos, and _expected
167+
raise ParseError(e._contents, e.pos, expected=e._expected) from None
192168

193169

194170
class _ASCIINumericListParser(Generic[_DType, _ElShape]):

src/foamlib/_files/_parsing/_skip_ext.c

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
#define PY_SSIZE_T_CLEAN
22
#include <Python.h>
3+
#include <string.h>
4+
#include <errno.h>
35

46
static PyObject *FoamFileDecodeError = NULL;
7+
static PyObject *ParseError = NULL;
58

69
/* Python function: _skip(contents: bytes | bytearray, pos: int, *, newline_ok: bool = True) -> int */
710
static PyObject *
@@ -118,6 +121,184 @@ skip(PyObject *self, PyObject *args, PyObject *kwargs)
118121
return PyLong_FromSsize_t(pos);
119122
}
120123

124+
/* Lookup tables for _parse_number */
125+
static int _IS_POSSIBLE_FLOAT[256] = {0};
126+
static int _IS_POSSIBLE_INTEGER[256] = {0};
127+
static int _IS_TOKEN_CONTINUATION[256] = {0};
128+
129+
static void
130+
init_lookup_tables(void)
131+
{
132+
/* Initialize _IS_POSSIBLE_FLOAT */
133+
const char *float_chars = "0123456789.-+eEinfnatyINFNATY";
134+
for (int i = 0; float_chars[i]; i++) {
135+
_IS_POSSIBLE_FLOAT[(unsigned char)float_chars[i]] = 1;
136+
}
137+
138+
/* Initialize _IS_POSSIBLE_INTEGER */
139+
const char *int_chars = "0123456789-+";
140+
for (int i = 0; int_chars[i]; i++) {
141+
_IS_POSSIBLE_INTEGER[(unsigned char)int_chars[i]] = 1;
142+
}
143+
144+
/* Initialize _IS_TOKEN_CONTINUATION */
145+
const char *token_start = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_#$";
146+
const char *token_continuation = "0123456789._<>#$:+-*/|^%&=!";
147+
for (int i = 0; token_start[i]; i++) {
148+
_IS_TOKEN_CONTINUATION[(unsigned char)token_start[i]] = 1;
149+
}
150+
for (int i = 0; token_continuation[i]; i++) {
151+
_IS_TOKEN_CONTINUATION[(unsigned char)token_continuation[i]] = 1;
152+
}
153+
}
154+
155+
/* Target type enum for _parse_number */
156+
enum ParseNumberTarget {
157+
TARGET_INT = 1,
158+
TARGET_FLOAT = 2,
159+
TARGET_INT_OR_FLOAT = 3
160+
};
161+
162+
/* Helper function to raise ParseError with the given parameters */
163+
static void
164+
raise_parse_error(PyObject *contents_obj, Py_ssize_t pos, const char *expected)
165+
{
166+
/* Create ParseError instance with attributes */
167+
PyObject *exc_instance = PyObject_CallObject(ParseError, NULL);
168+
if (exc_instance == NULL) {
169+
return;
170+
}
171+
172+
/* Set attributes on the exception instance */
173+
PyObject_SetAttrString(exc_instance, "_contents", contents_obj);
174+
PyObject_SetAttrString(exc_instance, "pos", PyLong_FromSsize_t(pos));
175+
PyObject_SetAttrString(exc_instance, "_expected", PyUnicode_FromString(expected));
176+
177+
/* Set the exception */
178+
PyErr_SetObject(ParseError, exc_instance);
179+
Py_DECREF(exc_instance);
180+
}
181+
182+
/* Python function: _parse_number(contents: bytes | bytearray, pos: int, *, target: type[int|float|int|float])
183+
-> tuple[int|float, int] */
184+
static PyObject *
185+
parse_number(PyObject *self, PyObject *args, PyObject *kwargs)
186+
{
187+
Py_buffer buffer;
188+
Py_ssize_t pos;
189+
PyObject *target = NULL;
190+
const unsigned char *contents;
191+
Py_ssize_t len;
192+
enum ParseNumberTarget target_type = TARGET_INT_OR_FLOAT;
193+
194+
static char *kwlist[] = {"contents", "pos", "target", NULL};
195+
196+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*n|$O", kwlist,
197+
&buffer, &pos, &target)) {
198+
return NULL;
199+
}
200+
201+
contents = (const unsigned char *)buffer.buf;
202+
len = buffer.len;
203+
204+
/* Determine target type */
205+
if (target != NULL) {
206+
/* Check if target is int type */
207+
if (target == (PyObject *)&PyLong_Type) {
208+
target_type = TARGET_INT;
209+
}
210+
/* Check if target is float type */
211+
else if (target == (PyObject *)&PyFloat_Type) {
212+
target_type = TARGET_FLOAT;
213+
}
214+
/* Otherwise assume int | float */
215+
}
216+
217+
/* Select the appropriate lookup table */
218+
int *is_numeric = (target_type == TARGET_INT) ? _IS_POSSIBLE_INTEGER : _IS_POSSIBLE_FLOAT;
219+
220+
/* Find the end of the numeric string */
221+
Py_ssize_t end = pos;
222+
while (end < len && is_numeric[contents[end]]) {
223+
end++;
224+
}
225+
226+
/* Check if followed by token continuation character */
227+
if (end < len && _IS_TOKEN_CONTINUATION[contents[end]]) {
228+
PyObject *contents_obj = buffer.obj;
229+
Py_INCREF(contents_obj);
230+
PyBuffer_Release(&buffer);
231+
raise_parse_error(contents_obj, pos, "number");
232+
Py_DECREF(contents_obj);
233+
return NULL;
234+
}
235+
236+
/* Check if we found any numeric characters */
237+
if (pos == end) {
238+
PyObject *contents_obj = buffer.obj;
239+
Py_INCREF(contents_obj);
240+
PyBuffer_Release(&buffer);
241+
raise_parse_error(contents_obj, pos, "number");
242+
Py_DECREF(contents_obj);
243+
return NULL;
244+
}
245+
246+
/* Extract the numeric string */
247+
PyObject *chars = PyBytes_FromStringAndSize((const char *)(contents + pos), end - pos);
248+
if (chars == NULL) {
249+
PyBuffer_Release(&buffer);
250+
return NULL;
251+
}
252+
253+
PyObject *result_value = NULL;
254+
255+
/* Try to parse as integer first if target allows */
256+
if (target_type != TARGET_FLOAT) {
257+
PyObject *int_result = PyLong_FromString(PyBytes_AS_STRING(chars), NULL, 10);
258+
if (int_result != NULL) {
259+
result_value = int_result;
260+
} else {
261+
PyErr_Clear();
262+
if (target_type == TARGET_INT) {
263+
/* Must be an integer, so this is an error */
264+
PyObject *contents_obj = buffer.obj;
265+
Py_INCREF(contents_obj);
266+
PyBuffer_Release(&buffer);
267+
Py_DECREF(chars);
268+
raise_parse_error(contents_obj, pos, "integer");
269+
Py_DECREF(contents_obj);
270+
return NULL;
271+
}
272+
}
273+
}
274+
275+
/* Try to parse as float if we haven't got a result yet */
276+
if (result_value == NULL) {
277+
PyObject *float_result = PyFloat_FromString(chars);
278+
if (float_result != NULL) {
279+
result_value = float_result;
280+
} else {
281+
PyErr_Clear();
282+
PyObject *contents_obj = buffer.obj;
283+
Py_INCREF(contents_obj);
284+
PyBuffer_Release(&buffer);
285+
Py_DECREF(chars);
286+
const char *expected_msg = (target_type == TARGET_FLOAT) ? "float" : "number";
287+
raise_parse_error(contents_obj, pos, expected_msg);
288+
Py_DECREF(contents_obj);
289+
return NULL;
290+
}
291+
}
292+
293+
Py_DECREF(chars);
294+
PyBuffer_Release(&buffer);
295+
296+
/* Return tuple (value, end) */
297+
PyObject *result = PyTuple_Pack(2, result_value, PyLong_FromSsize_t(end));
298+
Py_DECREF(result_value);
299+
return result;
300+
}
301+
121302
static PyMethodDef skip_methods[] = {
122303
{"_skip", (PyCFunction)skip, METH_VARARGS | METH_KEYWORDS,
123304
"Skip whitespace and comments in OpenFOAM file contents.\n\n"
@@ -127,20 +308,30 @@ static PyMethodDef skip_methods[] = {
127308
" newline_ok: if False, newlines are not skipped (default: True)\n\n"
128309
"Returns:\n"
129310
" New position after skipping whitespace and comments\n"},
311+
{"_parse_number", (PyCFunction)parse_number, METH_VARARGS | METH_KEYWORDS,
312+
"Parse a number (integer or float) from OpenFOAM file contents.\n\n"
313+
"Args:\n"
314+
" contents: bytes or bytearray to parse\n"
315+
" pos: current position in contents\n"
316+
" target: target type (int, float, or int|float)\n\n"
317+
"Returns:\n"
318+
" Tuple of (number, new_position)\n"},
130319
{NULL, NULL, 0, NULL}
131320
};
132321

133322
static int
134323
skip_module_traverse(PyObject *m, visitproc visit, void *arg)
135324
{
136325
Py_VISIT(FoamFileDecodeError);
326+
Py_VISIT(ParseError);
137327
return 0;
138328
}
139329

140330
static int
141331
skip_module_clear(PyObject *m)
142332
{
143333
Py_CLEAR(FoamFileDecodeError);
334+
Py_CLEAR(ParseError);
144335
return 0;
145336
}
146337

@@ -167,6 +358,9 @@ PyInit__skip_ext(void)
167358
{
168359
PyObject *module;
169360

361+
/* Initialize lookup tables */
362+
init_lookup_tables();
363+
170364
module = PyModule_Create(&skip_module);
171365
if (module == NULL) {
172366
return NULL;
@@ -187,5 +381,20 @@ PyInit__skip_ext(void)
187381
return NULL;
188382
}
189383

384+
/* Create a simple ParseError exception class that can hold attributes.
385+
The Python code in _parser.py will define a proper ParseError class
386+
and _parse_number will be updated to import it from there. */
387+
ParseError = PyErr_NewException("_skip_ext.ParseError", PyExc_Exception, NULL);
388+
if (ParseError == NULL) {
389+
Py_DECREF(module);
390+
return NULL;
391+
}
392+
Py_INCREF(ParseError);
393+
if (PyModule_AddObject(module, "ParseError", ParseError) < 0) {
394+
Py_DECREF(ParseError);
395+
Py_DECREF(module);
396+
return NULL;
397+
}
398+
190399
return module;
191400
}

0 commit comments

Comments
 (0)