Skip to content

Commit

Permalink
gh-127787: allow retrieving the clipped slice length in `_PyUnicodeEr…
Browse files Browse the repository at this point in the history
…ror_GetParams` (GH-128980)
  • Loading branch information
picnixz authored Jan 21, 2025
1 parent bf150f6 commit 36f341c
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Include/internal/pycore_pyerrors.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,9 @@ extern int _PyUnicodeError_GetParams(
Py_ssize_t *objlen,
Py_ssize_t *start,
Py_ssize_t *end,
Py_ssize_t *slen,
int as_bytes);


#ifdef __cplusplus
}
#endif
Expand Down
123 changes: 109 additions & 14 deletions Objects/exceptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -2954,8 +2954,10 @@ unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
* The 'start' can be negative or not, but when adjusting the value,
* we clip it in [0, max(0, objlen - 1)] and do not interpret it as
* a relative offset.
*
* This function always succeeds.
*/
static inline Py_ssize_t
static Py_ssize_t
unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
{
assert(objlen >= 0);
Expand All @@ -2969,14 +2971,34 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
}


/* Assert some properties of the adjusted 'start' value. */
#ifndef NDEBUG
static void
assert_adjusted_unicode_error_start(Py_ssize_t start, Py_ssize_t objlen)
{
assert(objlen >= 0);
/* in the future, `min_start` may be something else */
Py_ssize_t min_start = 0;
assert(start >= min_start);
/* in the future, `max_start` may be something else */
Py_ssize_t max_start = Py_MAX(min_start, objlen - 1);
assert(start <= max_start);
}
#else
#define assert_adjusted_unicode_error_start(...)
#endif


/*
* Adjust the (exclusive) 'end' value of a UnicodeError object.
*
* The 'end' can be negative or not, but when adjusting the value,
* we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and
* do not interpret it as a relative offset.
*
* This function always succeeds.
*/
static inline Py_ssize_t
static Py_ssize_t
unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
{
assert(objlen >= 0);
Expand All @@ -2990,6 +3012,59 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
}


/* Assert some properties of the adjusted 'end' value. */
#ifndef NDEBUG
static void
assert_adjusted_unicode_error_end(Py_ssize_t end, Py_ssize_t objlen)
{
assert(objlen >= 0);
/* in the future, `min_end` may be something else */
Py_ssize_t min_end = Py_MIN(1, objlen);
assert(end >= min_end);
/* in the future, `max_end` may be something else */
Py_ssize_t max_end = Py_MAX(min_end, objlen);
assert(end <= max_end);
}
#else
#define assert_adjusted_unicode_error_end(...)
#endif


/*
* Adjust the length of the range described by a UnicodeError object.
*
* The 'start' and 'end' arguments must have been obtained by
* unicode_error_adjust_start() and unicode_error_adjust_end().
*
* The result is clipped in [0, objlen]. By construction, it
* will always be smaller than 'objlen' as 'start' and 'end'
* are smaller than 'objlen'.
*/
static Py_ssize_t
unicode_error_adjust_len(Py_ssize_t start, Py_ssize_t end, Py_ssize_t objlen)
{
assert_adjusted_unicode_error_start(start, objlen);
assert_adjusted_unicode_error_end(end, objlen);
Py_ssize_t ranlen = end - start;
assert(ranlen <= objlen);
return ranlen < 0 ? 0 : ranlen;
}


/* Assert some properties of the adjusted range 'len' value. */
#ifndef NDEBUG
static void
assert_adjusted_unicode_error_len(Py_ssize_t ranlen, Py_ssize_t objlen)
{
assert(objlen >= 0);
assert(ranlen >= 0);
assert(ranlen <= objlen);
}
#else
#define assert_adjusted_unicode_error_len(...)
#endif


/*
* Get various common parameters of a UnicodeError object.
*
Expand All @@ -3004,22 +3079,24 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
* objlen The 'object' length.
* start The clipped 'start' attribute.
* end The clipped 'end' attribute.
* slen The length of the slice described by the clipped 'start'
* and 'end' values. It always lies in [0, objlen].
*
* An output parameter can be NULL to indicate that
* the corresponding value does not need to be stored.
*
* Input parameter:
*
* as_bytes If 1, the error's 'object' attribute must be a bytes object,
* i.e. the call is for a `UnicodeDecodeError`. Otherwise, the
* 'object' attribute must be a string.
* as_bytes If true, the error's 'object' attribute must be a `bytes`,
* i.e. 'self' is a `UnicodeDecodeError` instance. Otherwise,
* the 'object' attribute must be a string.
*
* A TypeError is raised if the 'object' type is incompatible.
*/
int
_PyUnicodeError_GetParams(PyObject *self,
PyObject **obj, Py_ssize_t *objlen,
Py_ssize_t *start, Py_ssize_t *end,
Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t *slen,
int as_bytes)
{
assert(self != NULL);
Expand All @@ -3034,16 +3111,30 @@ _PyUnicodeError_GetParams(PyObject *self,
if (objlen != NULL) {
*objlen = n;
}

Py_ssize_t start_value = -1;
if (start != NULL || slen != NULL) {
start_value = unicode_error_adjust_start(exc->start, n);
}
if (start != NULL) {
*start = unicode_error_adjust_start(exc->start, n);
assert(*start >= 0);
assert(*start <= n);
assert_adjusted_unicode_error_start(start_value, n);
*start = start_value;
}

Py_ssize_t end_value = -1;
if (end != NULL || slen != NULL) {
end_value = unicode_error_adjust_end(exc->end, n);
}
if (end != NULL) {
*end = unicode_error_adjust_end(exc->end, n);
assert(*end >= 0);
assert(*end <= n);
assert_adjusted_unicode_error_end(end_value, n);
*end = end_value;
}

if (slen != NULL) {
*slen = unicode_error_adjust_len(start_value, end_value, n);
assert_adjusted_unicode_error_len(*slen, n);
}

if (obj != NULL) {
*obj = r;
}
Expand Down Expand Up @@ -3111,7 +3202,9 @@ static inline int
unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes)
{
assert(self != NULL);
return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, as_bytes);
return _PyUnicodeError_GetParams(self, NULL, NULL,
start, NULL, NULL,
as_bytes);
}


Expand Down Expand Up @@ -3177,7 +3270,9 @@ static inline int
unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes)
{
assert(self != NULL);
return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, as_bytes);
return _PyUnicodeError_GetParams(self, NULL, NULL,
NULL, end, NULL,
as_bytes);
}


Expand Down

0 comments on commit 36f341c

Please sign in to comment.