From 5890e40365b12ef5b965eaa570bc426a69cb486e Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 24 Aug 2024 13:41:23 +0100 Subject: [PATCH 01/55] WIP - Initial Pillow->Arrow support * Fixed format, only for 4 channel images --- setup.py | 1 + src/PIL/Image.py | 7 ++ src/_imaging.c | 42 ++++++++++++ src/libImaging/Arrow.c | 139 +++++++++++++++++++++++++++++++++++++++ src/libImaging/Arrow.h | 48 ++++++++++++++ src/libImaging/Imaging.h | 11 ++++ src/libImaging/Storage.c | 7 ++ 7 files changed, 255 insertions(+) create mode 100644 src/libImaging/Arrow.c create mode 100644 src/libImaging/Arrow.h diff --git a/setup.py b/setup.py index a85731db936..80289e0c219 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,7 @@ def get_version() -> str: _LIB_IMAGING = ( "Access", "AlphaComposite", + "Arrow", "Resample", "Reduce", "Bands", diff --git a/src/PIL/Image.py b/src/PIL/Image.py index dff3d063b13..963b00fe200 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -748,6 +748,13 @@ def __array_interface__(self) -> dict[str, str | bytes | int | tuple[int, ...]]: new["shape"], new["typestr"] = _conv_type_shape(self) return new + + def __arrow_c_schema__(self) -> object: + return self.im.__arrow_c_schema__() + + def __arrow_c_array__(self, requested_schema: object | None = None) -> Tuple[object, object]: + return (self.im.__arrow_c_schema__(), self.im.__arrow_c_array__()) + def __getstate__(self) -> list[Any]: im_data = self.tobytes() # load image first return [self.info, self.mode, self.size, self.getpalette(), im_data] diff --git a/src/_imaging.c b/src/_imaging.c index 5d6d97bedab..eccdcb9e854 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -89,6 +89,7 @@ #endif #include "libImaging/Imaging.h" +#include "libImaging/Arrow.h" #define _USE_MATH_DEFINES #include @@ -223,6 +224,43 @@ PyImaging_GetBuffer(PyObject *buffer, Py_buffer *view) { return PyObject_GetBuffer(buffer, view, PyBUF_SIMPLE); } +/* -------------------------------------------------------------------- */ +/* Arrow HANDLING */ +/* -------------------------------------------------------------------- */ + +void ReleaseArrowSchemaPyCapsule(PyObject* capsule) { + struct ArrowSchema* schema = + (struct ArrowSchema*)PyCapsule_GetPointer(capsule, "arrow_schema"); + if (schema->release != NULL) { + schema->release(schema); + } + free(schema); +} + +PyObject* ExportArrowSchemaPyCapsule(ImagingObject *self) { + struct ArrowSchema* schema = + (struct ArrowSchema*)malloc(sizeof(struct ArrowSchema)); + export_uint32_type(schema); + return PyCapsule_New(schema, "arrow_schema", ReleaseArrowSchemaPyCapsule); +} + +void ReleaseArrowArrayPyCapsule(PyObject* capsule) { + struct ArrowArray* array = + (struct ArrowArray*)PyCapsule_GetPointer(capsule, "arrow_array"); + if (array->release != NULL) { + array->release(array); + } + free(array); +} + +PyObject* ExportArrowArrayPyCapsule(ImagingObject *self) { + struct ArrowArray* array = + (struct ArrowArray*)malloc(sizeof(struct ArrowArray)); + export_imaging_array(self->image, array); + return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule); +} + + /* -------------------------------------------------------------------- */ /* EXCEPTION REROUTING */ /* -------------------------------------------------------------------- */ @@ -3678,6 +3716,10 @@ static struct PyMethodDef methods[] = { /* Misc. */ {"save_ppm", (PyCFunction)_save_ppm, METH_VARARGS}, + /* arrow */ + {"__arrow_c_schema__", (PyCFunction)ExportArrowSchemaPyCapsule, METH_VARARGS}, + {"__arrow_c_array__", (PyCFunction)ExportArrowArrayPyCapsule, METH_VARARGS}, + {NULL, NULL} /* sentinel */ }; diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c new file mode 100644 index 00000000000..3a6d9dd605d --- /dev/null +++ b/src/libImaging/Arrow.c @@ -0,0 +1,139 @@ + +#include "Arrow.h" +#include "Imaging.h" + +/* struct ArrowSchema* */ +/* _arrow_schema_channel(char* channel, char* format) { */ + +/* } */ + +static void +ReleaseExportedSchema(struct ArrowSchema* array) { + // This should not be called on already released array + //assert(array->release != NULL); + + // Release children + for (int64_t i = 0; i < array->n_children; ++i) { + struct ArrowSchema* child = array->children[i]; + if (child->release != NULL) { + child->release(child); + //assert(child->release == NULL); + } + } + + // Release dictionary + struct ArrowSchema* dict = array->dictionary; + if (dict != NULL && dict->release != NULL) { + dict->release(dict); + //assert(dict->release == NULL); + } + + // TODO here: release and/or deallocate all data directly owned by + // the ArrowArray struct, such as the private_data. + + // Mark array released + array->release = NULL; +} + + +static void release_uint32_type(struct ArrowSchema* schema) { + // Mark released + schema->release = NULL; +} + +void export_uint32_type(struct ArrowSchema* schema) { + *schema = (struct ArrowSchema) { + // Type description + .format = "I", + .name = "", + .metadata = NULL, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_uint32_type + }; +} + +static void release_uint32_array(struct ArrowArray* array) { + //assert(array->n_buffers == 2); + // Free the buffers and the buffers array + free((void *) array->buffers[1]); + free(array->buffers); + // Mark released + array->release = NULL; +} + +void export_uint32_array(const uint32_t* data, int64_t nitems, + struct ArrowArray* array) { + // Initialize primitive fields + *array = (struct ArrowArray) { + // Data description + .length = nitems, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_uint32_array + }; + // Allocate list of buffers + array->buffers = (const void**) malloc(sizeof(void*) * array->n_buffers); + //assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + array->buffers[1] = data; +} + +static void release_const_array(struct ArrowArray* array) { + Imaging im = (Imaging)array->private_data; + im->arrow_borrow--; + ImagingDelete(im); + + //assert(array->n_buffers == 2); + // Free the buffers and the buffers array + free(array->buffers); + // Mark released + array->release = NULL; +} + + +void export_imaging_array(Imaging im, struct ArrowArray* array) { + int length = im->xsize * im->ysize; + + /* undone -- for now, single block images */ + //assert (im->block_count = 0 || im->block_count = 1); + + if (im->lines_per_block && im->lines_per_block < im->ysize) { + length = im->xsize * im->lines_per_block; + } + + im->arrow_borrow++; + // Initialize primitive fields + *array = (struct ArrowArray) { + // Data description + .length = length, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_const_array, + .private_data = im + }; + + // Allocate list of buffers + array->buffers = (const void**) malloc(sizeof(void*) * array->n_buffers); + //assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + + if (im->block) { + array->buffers[1] = im->block; + } else { + array->buffers[1] = im->blocks[0].ptr; + } +} diff --git a/src/libImaging/Arrow.h b/src/libImaging/Arrow.h new file mode 100644 index 00000000000..758be27b06d --- /dev/null +++ b/src/libImaging/Arrow.h @@ -0,0 +1,48 @@ +#include +#include + +// Apache License 2.0. +// Source apache arrow project +// https://arrow.apache.org/docs/format/CDataInterface.html + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 31052c68a97..a47ebedf0d7 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -104,6 +104,11 @@ struct ImagingMemoryInstance { /* Virtual methods */ void (*destroy)(Imaging im); + + /* arrow */ + int arrow_borrow; /* Number of arrow arrays that have been allocated */ + int blocks_count; /* Number of blocks that have been allocated */ + int lines_per_block; /* Number of lines in a block have been allocated */ }; #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)]) @@ -702,6 +707,12 @@ _imaging_seek_pyFd(PyObject *fd, Py_ssize_t offset, int whence); extern Py_ssize_t _imaging_tell_pyFd(PyObject *fd); +/* Arrow */ + +#include "Arrow.h" +extern void export_imaging_array(Imaging im, struct ArrowArray* array); +extern void export_uint32_type(struct ArrowSchema* schema); + /* Errcodes */ #define IMAGING_CODEC_END 1 #define IMAGING_CODEC_OVERRUN -1 diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 522e9f37557..1e3d6fce0f7 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -240,6 +240,11 @@ ImagingDelete(Imaging im) { if (im->palette) { ImagingPaletteDelete(im->palette); + im->palette = NULL; + } + + if (im->arrow_borrow) { + return; } if (im->destroy) { @@ -396,11 +401,13 @@ ImagingAllocateArray(Imaging im, ImagingMemoryArena arena, int dirty, int block_ if (lines_per_block == 0) { lines_per_block = 1; } + im->lines_per_block = lines_per_block; blocks_count = (im->ysize + lines_per_block - 1) / lines_per_block; // printf("NEW size: %dx%d, ls: %d, lpb: %d, blocks: %d\n", // im->xsize, im->ysize, aligned_linesize, lines_per_block, blocks_count); /* One extra pointer is always NULL */ + im->blocks_count = blocks_count; im->blocks = calloc(sizeof(*im->blocks), blocks_count + 1); if (!im->blocks) { return (Imaging)ImagingError_MemoryError(); From d44212d650d9ae876170450dc6c0db508c0c305c Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 24 Aug 2024 16:18:27 +0100 Subject: [PATCH 02/55] WIP - Non working struct encoding * structs can't be encoded this way, they need to have one child array per struct member. * i.e. struct of arrays, rather than an array of structs. --- src/PIL/Image.py | 2 + src/_imaging.c | 6 +-- src/libImaging/Arrow.c | 91 +++++++++++++++++++++++++++++++++++++++- src/libImaging/Imaging.h | 4 ++ src/libImaging/Storage.c | 60 ++++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 5 deletions(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 963b00fe200..49bd19ae449 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -750,9 +750,11 @@ def __array_interface__(self) -> dict[str, str | bytes | int | tuple[int, ...]]: def __arrow_c_schema__(self) -> object: + self.load() return self.im.__arrow_c_schema__() def __arrow_c_array__(self, requested_schema: object | None = None) -> Tuple[object, object]: + self.load() return (self.im.__arrow_c_schema__(), self.im.__arrow_c_array__()) def __getstate__(self) -> list[Any]: diff --git a/src/_imaging.c b/src/_imaging.c index eccdcb9e854..eea719a5f91 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -239,8 +239,8 @@ void ReleaseArrowSchemaPyCapsule(PyObject* capsule) { PyObject* ExportArrowSchemaPyCapsule(ImagingObject *self) { struct ArrowSchema* schema = - (struct ArrowSchema*)malloc(sizeof(struct ArrowSchema)); - export_uint32_type(schema); + (struct ArrowSchema*)calloc(1, sizeof(struct ArrowSchema)); + export_imaging_schema(self->image, schema); return PyCapsule_New(schema, "arrow_schema", ReleaseArrowSchemaPyCapsule); } @@ -255,7 +255,7 @@ void ReleaseArrowArrayPyCapsule(PyObject* capsule) { PyObject* ExportArrowArrayPyCapsule(ImagingObject *self) { struct ArrowArray* array = - (struct ArrowArray*)malloc(sizeof(struct ArrowArray)); + (struct ArrowArray*)calloc(1, sizeof(struct ArrowArray)); export_imaging_array(self->image, array); return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule); } diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index 3a6d9dd605d..550fb07d812 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -1,6 +1,7 @@ #include "Arrow.h" #include "Imaging.h" +#include /* struct ArrowSchema* */ /* _arrow_schema_channel(char* channel, char* format) { */ @@ -12,6 +13,18 @@ ReleaseExportedSchema(struct ArrowSchema* array) { // This should not be called on already released array //assert(array->release != NULL); + if (!array->release) { + return; + } + if (array->format) { + free((void*)array->format); + array->format = NULL; + } + if (array->name) { + free((void*)array->name); + array->name = NULL; + } + // Release children for (int64_t i = 0; i < array->n_children; ++i) { struct ArrowSchema* child = array->children[i]; @@ -36,7 +49,80 @@ ReleaseExportedSchema(struct ArrowSchema* array) { } -static void release_uint32_type(struct ArrowSchema* schema) { + +int export_named_type(struct ArrowSchema* schema, + char* format, + char* name) { + + char* formatp; + char* namep; + size_t format_len = strlen(format) + 1; + size_t name_len = strlen(name) + 1; + + formatp = calloc(format_len, 1); + + if (!formatp) { + return 1; + } + + namep = calloc(name_len, 1); + if (!namep){ + free(formatp); + return 1; + } + + strlcpy(formatp, format, format_len); + strlcpy(namep, name, name_len); + + *schema = (struct ArrowSchema) { + // Type description + .format = formatp, + .name = namep, + .metadata = NULL, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &ReleaseExportedSchema + }; + return 0; +} + +int export_imaging_schema(Imaging im, struct ArrowSchema* schema) { + int retval = 0; + + if (strcmp(im->arrow_band_format, "") == 0) { + return 1; + } + + if (im->bands == 1) { + return export_named_type(schema, im->arrow_band_format, im->band_names[0]); + } + + retval = export_named_type(schema, "+s", ""); + if (retval) { + return retval; + } + // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. + schema->n_children = 4; + schema->children = calloc(4, sizeof(struct ArrowSchema*)); + for (int i=0; i<4; i++) { + schema->children[i] = + (struct ArrowSchema*)calloc(1, sizeof(struct ArrowSchema)); + if (export_named_type(schema->children[i], im->arrow_band_format, im->band_names[i])) { + /* error recovery */ + for (int j=i-1; i>=0; i--) { + schema->children[j]->release(schema->children[j]); + free(schema->children[j]); + } + return 2; + } + } + return 0; +} + +static void release_simple_type(struct ArrowSchema* schema) { // Mark released schema->release = NULL; } @@ -52,10 +138,11 @@ void export_uint32_type(struct ArrowSchema* schema) { .children = NULL, .dictionary = NULL, // Bookkeeping - .release = &release_uint32_type + .release = &release_simple_type }; } + static void release_uint32_array(struct ArrowArray* array) { //assert(array->n_buffers == 2); // Free the buffers and the buffers array diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index a47ebedf0d7..79c24fd3568 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -109,6 +109,9 @@ struct ImagingMemoryInstance { int arrow_borrow; /* Number of arrow arrays that have been allocated */ int blocks_count; /* Number of blocks that have been allocated */ int lines_per_block; /* Number of lines in a block have been allocated */ + + char band_names[4][3]; /* names of bands, max 2 char + null terminator */ + char arrow_band_format[2]; /* single character + null terminator */ }; #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)]) @@ -711,6 +714,7 @@ _imaging_tell_pyFd(PyObject *fd); #include "Arrow.h" extern void export_imaging_array(Imaging im, struct ArrowArray* array); +extern int export_imaging_schema(Imaging im, struct ArrowSchema* schema); extern void export_uint32_type(struct ArrowSchema* schema); /* Errcodes */ diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 1e3d6fce0f7..49e5453676a 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -60,17 +60,20 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->ysize = ysize; im->type = IMAGING_TYPE_UINT8; + strcpy(im->arrow_band_format, "C"); if (strcmp(mode, "1") == 0) { /* 1-bit images */ im->bands = im->pixelsize = 1; im->linesize = xsize; + strcpy(im->band_names[0],"1"); } else if (strcmp(mode, "P") == 0) { /* 8-bit palette mapped images */ im->bands = im->pixelsize = 1; im->linesize = xsize; im->palette = ImagingPaletteNew("RGB"); + strcpy(im->band_names[0],"P"); } else if (strcmp(mode, "PA") == 0) { /* 8-bit palette with alpha */ @@ -78,23 +81,36 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 4; /* store in image32 memory */ im->linesize = xsize * 4; im->palette = ImagingPaletteNew("RGB"); + strcpy(im->band_names[0],"P"); + strcpy(im->band_names[1],"X"); + strcpy(im->band_names[2],"X"); + strcpy(im->band_names[3],"A"); } else if (strcmp(mode, "L") == 0) { /* 8-bit grayscale (luminance) images */ im->bands = im->pixelsize = 1; im->linesize = xsize; + strcpy(im->band_names[0],"L"); } else if (strcmp(mode, "LA") == 0) { /* 8-bit grayscale (luminance) with alpha */ im->bands = 2; im->pixelsize = 4; /* store in image32 memory */ im->linesize = xsize * 4; + strcpy(im->band_names[0],"L"); + strcpy(im->band_names[1],"X"); + strcpy(im->band_names[2],"X"); + strcpy(im->band_names[3],"A"); } else if (strcmp(mode, "La") == 0) { /* 8-bit grayscale (luminance) with premultiplied alpha */ im->bands = 2; im->pixelsize = 4; /* store in image32 memory */ im->linesize = xsize * 4; + strcpy(im->band_names[0],"L"); + strcpy(im->band_names[1],"X"); + strcpy(im->band_names[2],"X"); + strcpy(im->band_names[3],"a"); } else if (strcmp(mode, "F") == 0) { /* 32-bit floating point images */ @@ -102,6 +118,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 4; im->linesize = xsize * 4; im->type = IMAGING_TYPE_FLOAT32; + strcpy(im->arrow_band_format , "f"); + strcpy(im->band_names[0],"F"); } else if (strcmp(mode, "I") == 0) { /* 32-bit integer images */ @@ -109,6 +127,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 4; im->linesize = xsize * 4; im->type = IMAGING_TYPE_INT32; + strcpy(im->arrow_band_format , "i"); + strcpy(im->band_names[0],"I"); } else if (strcmp(mode, "I;16") == 0 || strcmp(mode, "I;16L") == 0 || strcmp(mode, "I;16B") == 0 || strcmp(mode, "I;16N") == 0) { @@ -118,12 +138,18 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 2; im->linesize = xsize * 2; im->type = IMAGING_TYPE_SPECIAL; + strcpy(im->arrow_band_format , "s"); + strcpy(im->band_names[0],"I"); } else if (strcmp(mode, "RGB") == 0) { /* 24-bit true colour images */ im->bands = 3; im->pixelsize = 4; im->linesize = xsize * 4; + strcpy(im->band_names[0],"R"); + strcpy(im->band_names[1],"G"); + strcpy(im->band_names[2],"B"); + strcpy(im->band_names[3],"X"); } else if (strcmp(mode, "BGR;15") == 0) { /* EXPERIMENTAL */ @@ -132,6 +158,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 2; im->linesize = (xsize * 2 + 3) & -4; im->type = IMAGING_TYPE_SPECIAL; + /* not allowing arrow due to line length packing */ + strcpy(im->arrow_band_format , ""); } else if (strcmp(mode, "BGR;16") == 0) { /* EXPERIMENTAL */ @@ -140,6 +168,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 2; im->linesize = (xsize * 2 + 3) & -4; im->type = IMAGING_TYPE_SPECIAL; + /* not allowing arrow due to line length packing */ + strcpy(im->arrow_band_format , ""); } else if (strcmp(mode, "BGR;24") == 0) { /* EXPERIMENTAL */ @@ -148,32 +178,54 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 3; im->linesize = (xsize * 3 + 3) & -4; im->type = IMAGING_TYPE_SPECIAL; + /* not allowing arrow due to line length packing */ + strcpy(im->arrow_band_format , ""); } else if (strcmp(mode, "RGBX") == 0) { /* 32-bit true colour images with padding */ im->bands = im->pixelsize = 4; im->linesize = xsize * 4; + strcpy(im->band_names[0],"R"); + strcpy(im->band_names[1],"G"); + strcpy(im->band_names[2],"B"); + strcpy(im->band_names[3],"X"); } else if (strcmp(mode, "RGBA") == 0) { /* 32-bit true colour images with alpha */ im->bands = im->pixelsize = 4; im->linesize = xsize * 4; + strcpy(im->band_names[0],"R"); + strcpy(im->band_names[1],"G"); + strcpy(im->band_names[2],"B"); + strcpy(im->band_names[3],"A"); } else if (strcmp(mode, "RGBa") == 0) { /* 32-bit true colour images with premultiplied alpha */ im->bands = im->pixelsize = 4; im->linesize = xsize * 4; + strcpy(im->band_names[0],"R"); + strcpy(im->band_names[1],"G"); + strcpy(im->band_names[2],"B"); + strcpy(im->band_names[3],"a"); } else if (strcmp(mode, "CMYK") == 0) { /* 32-bit colour separation */ im->bands = im->pixelsize = 4; im->linesize = xsize * 4; + strcpy(im->band_names[0],"C"); + strcpy(im->band_names[1],"M"); + strcpy(im->band_names[2],"Y"); + strcpy(im->band_names[3],"K"); } else if (strcmp(mode, "YCbCr") == 0) { /* 24-bit video format */ im->bands = 3; im->pixelsize = 4; im->linesize = xsize * 4; + strcpy(im->band_names[0],"Y"); + strcpy(im->band_names[1],"Cb"); + strcpy(im->band_names[2],"Cr"); + strcpy(im->band_names[3],"X"); } else if (strcmp(mode, "LAB") == 0) { /* 24-bit color, luminance, + 2 color channels */ @@ -181,6 +233,10 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->bands = 3; im->pixelsize = 4; im->linesize = xsize * 4; + strcpy(im->band_names[0],"L"); + strcpy(im->band_names[1],"a"); + strcpy(im->band_names[2],"b"); + strcpy(im->band_names[3],"X"); } else if (strcmp(mode, "HSV") == 0) { /* 24-bit color, luminance, + 2 color channels */ @@ -188,6 +244,10 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->bands = 3; im->pixelsize = 4; im->linesize = xsize * 4; + strcpy(im->band_names[0],"H"); + strcpy(im->band_names[1],"S"); + strcpy(im->band_names[2],"V"); + strcpy(im->band_names[3],"X"); } else { free(im); From bdd4b3ae0cea4fe35314f6bf0acfdecd10b2c2f7 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 24 Aug 2024 16:54:55 +0100 Subject: [PATCH 03/55] Export as fixed width pixels, or just pixel values for single channel. --- src/libImaging/Arrow.c | 104 +++++++++++++++++++++++++++++++++------ src/libImaging/Imaging.h | 2 +- 2 files changed, 90 insertions(+), 16 deletions(-) diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index 550fb07d812..d3af74c8033 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -100,24 +100,18 @@ int export_imaging_schema(Imaging im, struct ArrowSchema* schema) { return export_named_type(schema, im->arrow_band_format, im->band_names[0]); } - retval = export_named_type(schema, "+s", ""); + retval = export_named_type(schema, "+w:4", ""); if (retval) { return retval; } // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. - schema->n_children = 4; - schema->children = calloc(4, sizeof(struct ArrowSchema*)); - for (int i=0; i<4; i++) { - schema->children[i] = - (struct ArrowSchema*)calloc(1, sizeof(struct ArrowSchema)); - if (export_named_type(schema->children[i], im->arrow_band_format, im->band_names[i])) { - /* error recovery */ - for (int j=i-1; i>=0; i--) { - schema->children[j]->release(schema->children[j]); - free(schema->children[j]); - } - return 2; - } + schema->n_children = 1; + schema->children = calloc(1, sizeof(struct ArrowSchema*)); + schema->children[0] = (struct ArrowSchema*)calloc(1, sizeof(struct ArrowSchema)); + if (export_named_type(schema->children[0], im->arrow_band_format, "pixel")) { + free(schema->children[0]); + schema->release(schema); + return 2; } return 0; } @@ -187,7 +181,7 @@ static void release_const_array(struct ArrowArray* array) { } -void export_imaging_array(Imaging im, struct ArrowArray* array) { +int export_single_channel_array(Imaging im, struct ArrowArray* array){ int length = im->xsize * im->ysize; /* undone -- for now, single block images */ @@ -223,4 +217,84 @@ void export_imaging_array(Imaging im, struct ArrowArray* array) { } else { array->buffers[1] = im->blocks[0].ptr; } + return 0; +} + + +int export_fixed_pixel_array(Imaging im, struct ArrowArray* array) { + + int length = im->xsize * im->ysize; + + /* undone -- for now, single block images */ + //assert (im->block_count = 0 || im->block_count = 1); + + if (im->lines_per_block && im->lines_per_block < im->ysize) { + length = im->xsize * im->lines_per_block; + } + + im->arrow_borrow++; + // Initialize primitive fields + // Fixed length arrays are 1 buffer of validity, and the length in pixels. + // Data is in a child array. + *array = (struct ArrowArray) { + // Data description + .length = length, + .offset = 0, + .null_count = 0, + .n_buffers = 1, + .n_children = 1, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_const_array, + .private_data = im + }; + + // Allocate list of buffers + array->buffers = (const void**) calloc(1, sizeof(void*) * array->n_buffers); + //assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + + + // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. + array->n_children = 1; + array->children = calloc(1, sizeof(struct ArrowArray*)); + array->children[0] = (struct ArrowArray*)calloc(1, sizeof(struct ArrowArray)); + + im->arrow_borrow++; + *array->children[0] = (struct ArrowArray) { + // Data description + .length = length * 4, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_const_array, + .private_data = im + }; + + array->children[0]->buffers = (const void**) calloc(2, sizeof(void*) * array->n_buffers); + + if (im->block) { + array->children[0]->buffers[1] = im->block; + } else { + array->children[0]->buffers[1] = im->blocks[0].ptr; + } + return 0; +} + + +int export_imaging_array(Imaging im, struct ArrowArray* array) { + if (strcmp(im->arrow_band_format, "") == 0) { + return 1; + } + + if (im->bands == 1) { + return export_single_channel_array(im, array); + } + + return export_fixed_pixel_array(im, array); } diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 79c24fd3568..7f074292737 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -713,7 +713,7 @@ _imaging_tell_pyFd(PyObject *fd); /* Arrow */ #include "Arrow.h" -extern void export_imaging_array(Imaging im, struct ArrowArray* array); +extern int export_imaging_array(Imaging im, struct ArrowArray* array); extern int export_imaging_schema(Imaging im, struct ArrowSchema* schema); extern void export_uint32_type(struct ArrowSchema* schema); From 56780ceaae9cda18a0f0a405fc2ded483c3bb647 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sun, 25 Aug 2024 15:54:00 +0100 Subject: [PATCH 04/55] Tests, lifetime changes --- Tests/test_arrow.py | 65 ++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + src/libImaging/Arrow.c | 2 +- src/libImaging/Storage.c | 12 ++++---- 4 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 Tests/test_arrow.py diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py new file mode 100644 index 00000000000..86309b473d2 --- /dev/null +++ b/Tests/test_arrow.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import warnings + +import pytest + +from PIL import Image + +from .helper import assert_deep_equal, assert_image, hopper, skip_unless_feature + +from typing import Any # undone + +pyarrow = pytest.importorskip("pyarrow", reason="PyArrow not installed") + +TEST_IMAGE_SIZE = (10, 10) +from numbers import Number + + +def _test_img_equals_pyarray(img: Image.Image, arr: Any, mask) -> None: + assert img.height * img.width == len(arr) + px = img.load() + assert px is not None + for x in range(0, img.size[0], int(img.size[0] / 10)): + for y in range(0, img.size[1], int(img.size[1] / 10)): + if mask: + for ix, elt in enumerate(mask): + assert px[x,y][ix] == arr[y * img.width + x].as_py()[elt] + else: + assert_deep_equal(px[x, y], arr[y * img.width + x].as_py()) + + +# really hard to get a non-nullable list type +fl_uint8_4_type = pyarrow.field("_", + pyarrow.list_( + pyarrow.field("_", + pyarrow.uint8() + ).with_nullable(False) + ,4) + ).type + +@pytest.mark.parametrize( + "mode, dtype, mask", + ( + ("L", pyarrow.uint8(), None), + ("I", pyarrow.int32(), None), + ("F", pyarrow.float32(), None), + ("LA", fl_uint8_4_type, [0,3]), + ("RGB", fl_uint8_4_type, [0,1,2]), + ("RGBA", fl_uint8_4_type, None), + ("RGBX", fl_uint8_4_type, None), + ("CMYK", fl_uint8_4_type, None), + ("YCbCr", fl_uint8_4_type, [0,1,2]), + ("HSV", fl_uint8_4_type, [0,1,2]), + ), +) +def test_to_array(mode: str, dtype: Any, mask: Any ) -> None: + img = hopper(mode) + + # Resize to non-square + img = img.crop((3, 0, 124, 127)) + assert img.size == (121, 127) + + arr = pyarrow.array(img) + _test_img_equals_pyarray(img, arr, mask) + assert arr.type == dtype diff --git a/pyproject.toml b/pyproject.toml index 2c6c7bcd077..2a048f7753e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ optional-dependencies.tests = [ "pytest-cov", "pytest-timeout", "trove-classifiers>=2024.10.12", + "pyarrow", ] optional-dependencies.typing = [ "typing-extensions; python_version<'3.10'", diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index d3af74c8033..2e4d6a474aa 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -170,7 +170,7 @@ void export_uint32_array(const uint32_t* data, int64_t nitems, static void release_const_array(struct ArrowArray* array) { Imaging im = (Imaging)array->private_data; - im->arrow_borrow--; + ImagingDelete(im); //assert(array->n_buffers == 2); diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 49e5453676a..5505d44d5e7 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -58,7 +58,7 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { /* Setup image descriptor */ im->xsize = xsize; im->ysize = ysize; - + im->arrow_borrow = 1; im->type = IMAGING_TYPE_UINT8; strcpy(im->arrow_band_format, "C"); @@ -298,15 +298,17 @@ ImagingDelete(Imaging im) { return; } + im->arrow_borrow--; + + if (im->arrow_borrow>0) { + return; + } + if (im->palette) { ImagingPaletteDelete(im->palette); im->palette = NULL; } - if (im->arrow_borrow) { - return; - } - if (im->destroy) { im->destroy(im); } From 6ec855e0b7512477fa87dca52f3f3148cce2f643 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sun, 25 Aug 2024 16:00:08 +0100 Subject: [PATCH 05/55] Lifetime check --- Tests/test_arrow.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index 86309b473d2..57b22443b12 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -63,3 +63,41 @@ def test_to_array(mode: str, dtype: Any, mask: Any ) -> None: arr = pyarrow.array(img) _test_img_equals_pyarray(img, arr, mask) assert arr.type == dtype + + +def test_lifetime(): + # valgrind shouldn't error out here. + # arrays should be accessible after the image is deleted. + + img = hopper('L') + + arr_1 = pyarrow.array(img) + arr_2 = pyarrow.array(img) + + del(img) + + assert arr_1.sum().as_py() > 0 + del(arr_1) + + assert arr_2.sum().as_py() > 0 + del(arr_2) + +def test_lifetime2(): + # valgrind shouldn't error out here. + # img should remain after the arrays are collected. + + img = hopper('L') + + arr_1 = pyarrow.array(img) + arr_2 = pyarrow.array(img) + + + assert arr_1.sum().as_py() > 0 + del(arr_1) + + assert arr_2.sum().as_py() > 0 + del(arr_2) + + img2 = img.copy() + px = img2.load() + assert isinstance(px[0,0], int) From e1ef083f603b26e04937803b882e672941d40488 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sun, 25 Aug 2024 16:14:36 +0100 Subject: [PATCH 06/55] fix macoxisim --- src/libImaging/Arrow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index 2e4d6a474aa..f9c06789854 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -71,8 +71,8 @@ int export_named_type(struct ArrowSchema* schema, return 1; } - strlcpy(formatp, format, format_len); - strlcpy(namep, name, name_len); + strncpy(formatp, format, format_len); + strncpy(namep, name, name_len); *schema = (struct ArrowSchema) { // Type description From 97eb7c09bad7ce19a959156c6d47a9c5ea650085 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 21 Jan 2025 21:27:41 +0000 Subject: [PATCH 07/55] WIP -- First light of round trip of image -> arrow -> image * basic safety included * only respecting the types we emit --- Tests/test_arrow.py | 7 ++- src/PIL/Image.py | 20 ++++++++ src/_imaging.c | 34 ++++++++++++- src/libImaging/Imaging.h | 18 +++++-- src/libImaging/Storage.c | 104 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 178 insertions(+), 5 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index 57b22443b12..e8eff880db4 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -6,7 +6,7 @@ from PIL import Image -from .helper import assert_deep_equal, assert_image, hopper, skip_unless_feature +from .helper import assert_deep_equal, assert_image, hopper, skip_unless_feature, assert_image_equal from typing import Any # undone @@ -64,6 +64,11 @@ def test_to_array(mode: str, dtype: Any, mask: Any ) -> None: _test_img_equals_pyarray(img, arr, mask) assert arr.type == dtype + reloaded = Image.fromarrow(arr, mode, img.size) + + assert reloaded + + assert_image_equal(img, reloaded) def test_lifetime(): # valgrind shouldn't error out here. diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 49bd19ae449..b1dcf0e0fa4 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3258,6 +3258,14 @@ class SupportsArrayInterface(Protocol): def __array_interface__(self) -> dict[str, Any]: raise NotImplementedError() +class SupportsArrowArrayInterface(Protocol): + """ + An object that has an ``__arrow_c_array__`` method corresponding to the arrow c data interface. + """ + + def __arrow_c_array__(self, requested_schema:"PyCapsule"=None) -> tuple["PyCapsule", "PyCapsule"]: + raise NotImplementedError() + def fromarray(obj: SupportsArrayInterface, mode: str | None = None) -> Image: """ @@ -3347,6 +3355,18 @@ def fromarray(obj: SupportsArrayInterface, mode: str | None = None) -> Image: return frombuffer(mode, size, obj, "raw", rawmode, 0, 1) +def fromarrow(obj: SupportsArrowArrayIngerface, mode, size) -> ImageFile.ImageFile: + if not hasattr(obj, '__arrow_c_array__'): + raise ValueError("arrow_c_array interface not found") + + (schema_capsule, array_capsule) = obj.__arrow_c_array__() + _im = core.new_arrow(mode, size, schema_capsule, array_capsule) + if (_im): + return Image()._new(_im) + + return None + + def fromqimage(im: ImageQt.QImage) -> ImageFile.ImageFile: """Creates an image instance from a QImage image""" from . import ImageQt diff --git a/src/_imaging.c b/src/_imaging.c index eea719a5f91..c8bc21ac3f3 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -89,7 +89,6 @@ #endif #include "libImaging/Imaging.h" -#include "libImaging/Arrow.h" #define _USE_MATH_DEFINES #include @@ -261,6 +260,38 @@ PyObject* ExportArrowArrayPyCapsule(ImagingObject *self) { } +static PyObject * +_new_arrow(PyObject *self, PyObject *args) { + char *mode; + int xsize, ysize; + PyObject *schema_capsule, *array_capsule; + PyObject *ret; + + if (!PyArg_ParseTuple(args, "s(ii)OO", &mode, &xsize, &ysize, + &schema_capsule, &array_capsule)) { + return NULL; + } + + struct ArrowSchema* schema = + (struct ArrowSchema*)PyCapsule_GetPointer(schema_capsule, "arrow_schema"); + + struct ArrowArray* array = + (struct ArrowArray*)PyCapsule_GetPointer(array_capsule, "arrow_array"); + + ret = PyImagingNew(ImagingNewArrow(mode, xsize, ysize, schema, array)); + if (schema->release){ + schema->release(schema); + schema->release = NULL; + } + if (!ret && array->release) { + array->release(array); + array->release = NULL; + } + return ret; +} + + + /* -------------------------------------------------------------------- */ /* EXCEPTION REROUTING */ /* -------------------------------------------------------------------- */ @@ -4258,6 +4289,7 @@ static PyMethodDef functions[] = { {"fill", (PyCFunction)_fill, METH_VARARGS}, {"new", (PyCFunction)_new, METH_VARARGS}, {"new_block", (PyCFunction)_new_block, METH_VARARGS}, + {"new_arrow", (PyCFunction)_new_arrow, METH_VARARGS}, {"merge", (PyCFunction)_merge, METH_VARARGS}, /* Functions */ diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 7f074292737..cf43873f390 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -20,6 +20,8 @@ extern "C" { #define M_PI 3.1415926535897932384626433832795 #endif +#include "Arrow.h" + /* -------------------------------------------------------------------- */ /* @@ -107,11 +109,17 @@ struct ImagingMemoryInstance { /* arrow */ int arrow_borrow; /* Number of arrow arrays that have been allocated */ + char band_names[4][3]; /* names of bands, max 2 char + null terminator */ + char arrow_band_format[2]; /* single character + null terminator */ + + int read_only; /* flag for read-only. set for arrow borrowed arrays */ + struct ArrowArray *arrow_array_capsule; /* upstream arrow array source */ + int blocks_count; /* Number of blocks that have been allocated */ int lines_per_block; /* Number of lines in a block have been allocated */ - char band_names[4][3]; /* names of bands, max 2 char + null terminator */ - char arrow_band_format[2]; /* single character + null terminator */ + + }; #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)]) @@ -195,6 +203,11 @@ ImagingDelete(Imaging im); extern Imaging ImagingNewBlock(const char *mode, int xsize, int ysize); +extern Imaging +ImagingNewArrow(const char *mode, int xsize, int ysize, + struct ArrowSchema *schema, + struct ArrowArray *external_array); + extern Imaging ImagingNewPrologue(const char *mode, int xsize, int ysize); extern Imaging @@ -712,7 +725,6 @@ _imaging_tell_pyFd(PyObject *fd); /* Arrow */ -#include "Arrow.h" extern int export_imaging_array(Imaging im, struct ArrowArray* array); extern int export_imaging_schema(Imaging im, struct ArrowSchema* schema); extern void export_uint32_type(struct ArrowSchema* schema); diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 5505d44d5e7..f2e03424753 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -278,6 +278,7 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { break; } + // UNDONE -- not accurate for arrow MUTEX_LOCK(&ImagingDefaultArena.mutex); ImagingDefaultArena.stats_new_count += 1; MUTEX_UNLOCK(&ImagingDefaultArena.mutex); @@ -556,6 +557,62 @@ ImagingAllocateBlock(Imaging im) { return im; } +/* Borrowed Arrow Storage Type */ +/* --------------------------- */ +/* Don't allocate the image. */ + + +static void +ImagingDestroyArrow(Imaging im) { + if (im->arrow_array_capsule && im->arrow_array_capsule->release) { + im->arrow_array_capsule->release(im->arrow_array_capsule); + im->arrow_array_capsule->release = NULL; + im->arrow_array_capsule = NULL; + } +} + +Imaging +ImagingAllocateArrow(Imaging im, struct ArrowArray *external_array) { + /* don't really allocate, but naming patterns */ + Py_ssize_t y, i; + + char* borrowed_buffer = NULL; + struct ArrowArray* arr = external_array; + + /* overflow check for malloc */ + if (im->linesize && im->ysize > INT_MAX / im->linesize) { + return (Imaging)ImagingError_MemoryError(); + } + + if (arr->n_children == 1) { + arr = arr->children[0]; + } + if (arr->n_buffers == 2) { + // undone offset. offset here is # of elements, + // so depends on the size of the element + // undone image is char*, arrow is void * + // buffer 0 is the null list + // buffer 1 is the data + borrowed_buffer = (char *)arr->buffers[1]; + } + + if (! borrowed_buffer) { + // UNDONE better error here. + // currently, only wanting one where + return (Imaging)ImagingError_MemoryError(); + } + + for (y = i = 0; y < im->ysize; y++) { + im->image[y] = borrowed_buffer + i; + i += im->linesize; + } + im->read_only = 1; + im->arrow_array_capsule = external_array; + im->destroy = ImagingDestroyArrow; + + return im; +} + /* -------------------------------------------------------------------- * Create a new, internally allocated, image. */ @@ -627,6 +684,53 @@ ImagingNewBlock(const char *mode, int xsize, int ysize) { return NULL; } +Imaging +ImagingNewArrow(const char *mode, int xsize, int ysize, + struct ArrowSchema *schema, + struct ArrowArray *external_array) { + /* A borrowed arrow array */ + Imaging im; + + if (xsize < 0 || ysize < 0) { + return (Imaging)ImagingError_ValueError("bad image size"); + } + + im = ImagingNewPrologue(mode, xsize, ysize); + if (!im) { + return NULL; + } + + int64_t pixels = (int64_t)xsize * (int64_t)ysize; + + if (((strcmp(schema->format, "i") == 0 && + im->pixelsize == 4) || + (strcmp(schema->format, im->arrow_band_format) == 0 && + im->bands == 1)) + && pixels == external_array->length) { + // one arrow element per, and it matches a pixelsize*char + if (ImagingAllocateArrow(im, external_array)) { + return im; + } + } + if (strcmp(schema->format, "+w:4") == 0 + && im->pixelsize == 4 + && schema->n_children > 0 + && schema->children + && strcmp(schema->children[0]->format, "C") == 0 + && pixels == external_array->length + && external_array->n_children == 1 + && external_array->children + && 4 * pixels == external_array->children[0]->length) { + // 4 up element of char into pixelsize == 4 + if (ImagingAllocateArrow(im, external_array)) { + return im; + } + } + + ImagingDelete(im); + return NULL; +} + Imaging ImagingNew2Dirty(const char *mode, Imaging imOut, Imaging imIn) { /* allocate or validate output image */ From f1349e973d32439ebba72826b93f91af76a87ac0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 21 Jan 2025 21:41:54 +0000 Subject: [PATCH 08/55] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Tests/test_arrow.py | 53 ++-- pyproject.toml | 2 +- src/PIL/Image.py | 14 +- src/_imaging.c | 54 +++-- src/libImaging/Arrow.c | 505 +++++++++++++++++++-------------------- src/libImaging/Arrow.h | 54 ++--- src/libImaging/Imaging.h | 30 ++- src/libImaging/Storage.c | 155 ++++++------ 8 files changed, 433 insertions(+), 434 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index e8eff880db4..68a4f96f09e 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -1,19 +1,20 @@ from __future__ import annotations -import warnings +from typing import Any # undone import pytest from PIL import Image -from .helper import assert_deep_equal, assert_image, hopper, skip_unless_feature, assert_image_equal - -from typing import Any # undone +from .helper import ( + assert_deep_equal, + assert_image_equal, + hopper, +) pyarrow = pytest.importorskip("pyarrow", reason="PyArrow not installed") TEST_IMAGE_SIZE = (10, 10) -from numbers import Number def _test_img_equals_pyarray(img: Image.Image, arr: Any, mask) -> None: @@ -24,19 +25,16 @@ def _test_img_equals_pyarray(img: Image.Image, arr: Any, mask) -> None: for y in range(0, img.size[1], int(img.size[1] / 10)): if mask: for ix, elt in enumerate(mask): - assert px[x,y][ix] == arr[y * img.width + x].as_py()[elt] + assert px[x, y][ix] == arr[y * img.width + x].as_py()[elt] else: assert_deep_equal(px[x, y], arr[y * img.width + x].as_py()) # really hard to get a non-nullable list type -fl_uint8_4_type = pyarrow.field("_", - pyarrow.list_( - pyarrow.field("_", - pyarrow.uint8() - ).with_nullable(False) - ,4) - ).type +fl_uint8_4_type = pyarrow.field( + "_", pyarrow.list_(pyarrow.field("_", pyarrow.uint8()).with_nullable(False), 4) +).type + @pytest.mark.parametrize( "mode, dtype, mask", @@ -44,16 +42,16 @@ def _test_img_equals_pyarray(img: Image.Image, arr: Any, mask) -> None: ("L", pyarrow.uint8(), None), ("I", pyarrow.int32(), None), ("F", pyarrow.float32(), None), - ("LA", fl_uint8_4_type, [0,3]), - ("RGB", fl_uint8_4_type, [0,1,2]), + ("LA", fl_uint8_4_type, [0, 3]), + ("RGB", fl_uint8_4_type, [0, 1, 2]), ("RGBA", fl_uint8_4_type, None), ("RGBX", fl_uint8_4_type, None), ("CMYK", fl_uint8_4_type, None), - ("YCbCr", fl_uint8_4_type, [0,1,2]), - ("HSV", fl_uint8_4_type, [0,1,2]), + ("YCbCr", fl_uint8_4_type, [0, 1, 2]), + ("HSV", fl_uint8_4_type, [0, 1, 2]), ), ) -def test_to_array(mode: str, dtype: Any, mask: Any ) -> None: +def test_to_array(mode: str, dtype: Any, mask: Any) -> None: img = hopper(mode) # Resize to non-square @@ -70,39 +68,40 @@ def test_to_array(mode: str, dtype: Any, mask: Any ) -> None: assert_image_equal(img, reloaded) + def test_lifetime(): # valgrind shouldn't error out here. # arrays should be accessible after the image is deleted. - img = hopper('L') + img = hopper("L") arr_1 = pyarrow.array(img) arr_2 = pyarrow.array(img) - del(img) + del img assert arr_1.sum().as_py() > 0 - del(arr_1) + del arr_1 assert arr_2.sum().as_py() > 0 - del(arr_2) + del arr_2 + def test_lifetime2(): # valgrind shouldn't error out here. # img should remain after the arrays are collected. - img = hopper('L') + img = hopper("L") arr_1 = pyarrow.array(img) arr_2 = pyarrow.array(img) - assert arr_1.sum().as_py() > 0 - del(arr_1) + del arr_1 assert arr_2.sum().as_py() > 0 - del(arr_2) + del arr_2 img2 = img.copy() px = img2.load() - assert isinstance(px[0,0], int) + assert isinstance(px[0, 0], int) diff --git a/pyproject.toml b/pyproject.toml index 2a048f7753e..9cf3344f368 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,12 +61,12 @@ optional-dependencies.tests = [ "markdown2", "olefile", "packaging", + "pyarrow", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers>=2024.10.12", - "pyarrow", ] optional-dependencies.typing = [ "typing-extensions; python_version<'3.10'", diff --git a/src/PIL/Image.py b/src/PIL/Image.py index b1dcf0e0fa4..ef0a05b74c9 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -748,12 +748,13 @@ def __array_interface__(self) -> dict[str, str | bytes | int | tuple[int, ...]]: new["shape"], new["typestr"] = _conv_type_shape(self) return new - def __arrow_c_schema__(self) -> object: self.load() return self.im.__arrow_c_schema__() - def __arrow_c_array__(self, requested_schema: object | None = None) -> Tuple[object, object]: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: self.load() return (self.im.__arrow_c_schema__(), self.im.__arrow_c_array__()) @@ -3258,12 +3259,15 @@ class SupportsArrayInterface(Protocol): def __array_interface__(self) -> dict[str, Any]: raise NotImplementedError() + class SupportsArrowArrayInterface(Protocol): """ An object that has an ``__arrow_c_array__`` method corresponding to the arrow c data interface. """ - def __arrow_c_array__(self, requested_schema:"PyCapsule"=None) -> tuple["PyCapsule", "PyCapsule"]: + def __arrow_c_array__( + self, requested_schema: PyCapsule = None + ) -> tuple[PyCapsule, PyCapsule]: raise NotImplementedError() @@ -3356,12 +3360,12 @@ def fromarray(obj: SupportsArrayInterface, mode: str | None = None) -> Image: def fromarrow(obj: SupportsArrowArrayIngerface, mode, size) -> ImageFile.ImageFile: - if not hasattr(obj, '__arrow_c_array__'): + if not hasattr(obj, "__arrow_c_array__"): raise ValueError("arrow_c_array interface not found") (schema_capsule, array_capsule) = obj.__arrow_c_array__() _im = core.new_arrow(mode, size, schema_capsule, array_capsule) - if (_im): + if _im: return Image()._new(_im) return None diff --git a/src/_imaging.c b/src/_imaging.c index c8bc21ac3f3..09d7096dcf3 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -227,39 +227,42 @@ PyImaging_GetBuffer(PyObject *buffer, Py_buffer *view) { /* Arrow HANDLING */ /* -------------------------------------------------------------------- */ -void ReleaseArrowSchemaPyCapsule(PyObject* capsule) { - struct ArrowSchema* schema = - (struct ArrowSchema*)PyCapsule_GetPointer(capsule, "arrow_schema"); +void +ReleaseArrowSchemaPyCapsule(PyObject *capsule) { + struct ArrowSchema *schema = + (struct ArrowSchema *)PyCapsule_GetPointer(capsule, "arrow_schema"); if (schema->release != NULL) { schema->release(schema); } free(schema); } -PyObject* ExportArrowSchemaPyCapsule(ImagingObject *self) { - struct ArrowSchema* schema = - (struct ArrowSchema*)calloc(1, sizeof(struct ArrowSchema)); +PyObject * +ExportArrowSchemaPyCapsule(ImagingObject *self) { + struct ArrowSchema *schema = + (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema)); export_imaging_schema(self->image, schema); return PyCapsule_New(schema, "arrow_schema", ReleaseArrowSchemaPyCapsule); } -void ReleaseArrowArrayPyCapsule(PyObject* capsule) { - struct ArrowArray* array = - (struct ArrowArray*)PyCapsule_GetPointer(capsule, "arrow_array"); +void +ReleaseArrowArrayPyCapsule(PyObject *capsule) { + struct ArrowArray *array = + (struct ArrowArray *)PyCapsule_GetPointer(capsule, "arrow_array"); if (array->release != NULL) { array->release(array); } free(array); } -PyObject* ExportArrowArrayPyCapsule(ImagingObject *self) { - struct ArrowArray* array = - (struct ArrowArray*)calloc(1, sizeof(struct ArrowArray)); +PyObject * +ExportArrowArrayPyCapsule(ImagingObject *self) { + struct ArrowArray *array = + (struct ArrowArray *)calloc(1, sizeof(struct ArrowArray)); export_imaging_array(self->image, array); return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule); } - static PyObject * _new_arrow(PyObject *self, PyObject *args) { char *mode; @@ -267,31 +270,30 @@ _new_arrow(PyObject *self, PyObject *args) { PyObject *schema_capsule, *array_capsule; PyObject *ret; - if (!PyArg_ParseTuple(args, "s(ii)OO", &mode, &xsize, &ysize, - &schema_capsule, &array_capsule)) { + if (!PyArg_ParseTuple( + args, "s(ii)OO", &mode, &xsize, &ysize, &schema_capsule, &array_capsule + )) { return NULL; } - struct ArrowSchema* schema = - (struct ArrowSchema*)PyCapsule_GetPointer(schema_capsule, "arrow_schema"); + struct ArrowSchema *schema = + (struct ArrowSchema *)PyCapsule_GetPointer(schema_capsule, "arrow_schema"); - struct ArrowArray* array = - (struct ArrowArray*)PyCapsule_GetPointer(array_capsule, "arrow_array"); + struct ArrowArray *array = + (struct ArrowArray *)PyCapsule_GetPointer(array_capsule, "arrow_array"); ret = PyImagingNew(ImagingNewArrow(mode, xsize, ysize, schema, array)); - if (schema->release){ - schema->release(schema); - schema->release = NULL; + if (schema->release) { + schema->release(schema); + schema->release = NULL; } if (!ret && array->release) { - array->release(array); - array->release = NULL; + array->release(array); + array->release = NULL; } return ret; } - - /* -------------------------------------------------------------------- */ /* EXCEPTION REROUTING */ /* -------------------------------------------------------------------- */ diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index f9c06789854..f7bcfcf97fc 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -9,292 +9,285 @@ /* } */ static void -ReleaseExportedSchema(struct ArrowSchema* array) { - // This should not be called on already released array - //assert(array->release != NULL); - - if (!array->release) { - return; - } - if (array->format) { - free((void*)array->format); - array->format = NULL; - } - if (array->name) { - free((void*)array->name); - array->name = NULL; - } - - // Release children - for (int64_t i = 0; i < array->n_children; ++i) { - struct ArrowSchema* child = array->children[i]; - if (child->release != NULL) { - child->release(child); - //assert(child->release == NULL); +ReleaseExportedSchema(struct ArrowSchema *array) { + // This should not be called on already released array + // assert(array->release != NULL); + + if (!array->release) { + return; + } + if (array->format) { + free((void *)array->format); + array->format = NULL; + } + if (array->name) { + free((void *)array->name); + array->name = NULL; + } + + // Release children + for (int64_t i = 0; i < array->n_children; ++i) { + struct ArrowSchema *child = array->children[i]; + if (child->release != NULL) { + child->release(child); + // assert(child->release == NULL); + } } - } - // Release dictionary - struct ArrowSchema* dict = array->dictionary; - if (dict != NULL && dict->release != NULL) { - dict->release(dict); - //assert(dict->release == NULL); - } + // Release dictionary + struct ArrowSchema *dict = array->dictionary; + if (dict != NULL && dict->release != NULL) { + dict->release(dict); + // assert(dict->release == NULL); + } - // TODO here: release and/or deallocate all data directly owned by - // the ArrowArray struct, such as the private_data. + // TODO here: release and/or deallocate all data directly owned by + // the ArrowArray struct, such as the private_data. - // Mark array released - array->release = NULL; + // Mark array released + array->release = NULL; } +int +export_named_type(struct ArrowSchema *schema, char *format, char *name) { + char *formatp; + char *namep; + size_t format_len = strlen(format) + 1; + size_t name_len = strlen(name) + 1; + formatp = calloc(format_len, 1); -int export_named_type(struct ArrowSchema* schema, - char* format, - char* name) { - - char* formatp; - char* namep; - size_t format_len = strlen(format) + 1; - size_t name_len = strlen(name) + 1; - - formatp = calloc(format_len, 1); - - if (!formatp) { - return 1; - } - - namep = calloc(name_len, 1); - if (!namep){ - free(formatp); - return 1; - } - - strncpy(formatp, format, format_len); - strncpy(namep, name, name_len); - - *schema = (struct ArrowSchema) { - // Type description - .format = formatp, - .name = namep, - .metadata = NULL, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &ReleaseExportedSchema - }; - return 0; -} + if (!formatp) { + return 1; + } -int export_imaging_schema(Imaging im, struct ArrowSchema* schema) { - int retval = 0; - - if (strcmp(im->arrow_band_format, "") == 0) { - return 1; - } - - if (im->bands == 1) { - return export_named_type(schema, im->arrow_band_format, im->band_names[0]); - } - - retval = export_named_type(schema, "+w:4", ""); - if (retval) { - return retval; - } - // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. - schema->n_children = 1; - schema->children = calloc(1, sizeof(struct ArrowSchema*)); - schema->children[0] = (struct ArrowSchema*)calloc(1, sizeof(struct ArrowSchema)); - if (export_named_type(schema->children[0], im->arrow_band_format, "pixel")) { - free(schema->children[0]); - schema->release(schema); - return 2; - } - return 0; + namep = calloc(name_len, 1); + if (!namep) { + free(formatp); + return 1; + } + + strncpy(formatp, format, format_len); + strncpy(namep, name, name_len); + + *schema = (struct ArrowSchema){// Type description + .format = formatp, + .name = namep, + .metadata = NULL, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &ReleaseExportedSchema + }; + return 0; } -static void release_simple_type(struct ArrowSchema* schema) { - // Mark released - schema->release = NULL; +int +export_imaging_schema(Imaging im, struct ArrowSchema *schema) { + int retval = 0; + + if (strcmp(im->arrow_band_format, "") == 0) { + return 1; + } + + if (im->bands == 1) { + return export_named_type(schema, im->arrow_band_format, im->band_names[0]); + } + + retval = export_named_type(schema, "+w:4", ""); + if (retval) { + return retval; + } + // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. + schema->n_children = 1; + schema->children = calloc(1, sizeof(struct ArrowSchema *)); + schema->children[0] = (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema)); + if (export_named_type(schema->children[0], im->arrow_band_format, "pixel")) { + free(schema->children[0]); + schema->release(schema); + return 2; + } + return 0; } -void export_uint32_type(struct ArrowSchema* schema) { - *schema = (struct ArrowSchema) { - // Type description - .format = "I", - .name = "", - .metadata = NULL, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &release_simple_type - }; +static void +release_simple_type(struct ArrowSchema *schema) { + // Mark released + schema->release = NULL; } +void +export_uint32_type(struct ArrowSchema *schema) { + *schema = (struct ArrowSchema){// Type description + .format = "I", + .name = "", + .metadata = NULL, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_simple_type + }; +} -static void release_uint32_array(struct ArrowArray* array) { - //assert(array->n_buffers == 2); - // Free the buffers and the buffers array - free((void *) array->buffers[1]); - free(array->buffers); - // Mark released - array->release = NULL; +static void +release_uint32_array(struct ArrowArray *array) { + // assert(array->n_buffers == 2); + // Free the buffers and the buffers array + free((void *)array->buffers[1]); + free(array->buffers); + // Mark released + array->release = NULL; } -void export_uint32_array(const uint32_t* data, int64_t nitems, - struct ArrowArray* array) { - // Initialize primitive fields - *array = (struct ArrowArray) { - // Data description - .length = nitems, - .offset = 0, - .null_count = 0, - .n_buffers = 2, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &release_uint32_array - }; - // Allocate list of buffers - array->buffers = (const void**) malloc(sizeof(void*) * array->n_buffers); - //assert(array->buffers != NULL); - array->buffers[0] = NULL; // no nulls, null bitmap can be omitted - array->buffers[1] = data; +void +export_uint32_array(const uint32_t *data, int64_t nitems, struct ArrowArray *array) { + // Initialize primitive fields + *array = (struct ArrowArray){// Data description + .length = nitems, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_uint32_array + }; + // Allocate list of buffers + array->buffers = (const void **)malloc(sizeof(void *) * array->n_buffers); + // assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + array->buffers[1] = data; } -static void release_const_array(struct ArrowArray* array) { - Imaging im = (Imaging)array->private_data; +static void +release_const_array(struct ArrowArray *array) { + Imaging im = (Imaging)array->private_data; - ImagingDelete(im); + ImagingDelete(im); - //assert(array->n_buffers == 2); - // Free the buffers and the buffers array - free(array->buffers); - // Mark released - array->release = NULL; + // assert(array->n_buffers == 2); + // Free the buffers and the buffers array + free(array->buffers); + // Mark released + array->release = NULL; } +int +export_single_channel_array(Imaging im, struct ArrowArray *array) { + int length = im->xsize * im->ysize; -int export_single_channel_array(Imaging im, struct ArrowArray* array){ - int length = im->xsize * im->ysize; - - /* undone -- for now, single block images */ - //assert (im->block_count = 0 || im->block_count = 1); - - if (im->lines_per_block && im->lines_per_block < im->ysize) { - length = im->xsize * im->lines_per_block; - } - - im->arrow_borrow++; - // Initialize primitive fields - *array = (struct ArrowArray) { - // Data description - .length = length, - .offset = 0, - .null_count = 0, - .n_buffers = 2, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &release_const_array, - .private_data = im - }; - - // Allocate list of buffers - array->buffers = (const void**) malloc(sizeof(void*) * array->n_buffers); - //assert(array->buffers != NULL); - array->buffers[0] = NULL; // no nulls, null bitmap can be omitted - - if (im->block) { - array->buffers[1] = im->block; - } else { - array->buffers[1] = im->blocks[0].ptr; - } - return 0; -} + /* undone -- for now, single block images */ + // assert (im->block_count = 0 || im->block_count = 1); + if (im->lines_per_block && im->lines_per_block < im->ysize) { + length = im->xsize * im->lines_per_block; + } -int export_fixed_pixel_array(Imaging im, struct ArrowArray* array) { - - int length = im->xsize * im->ysize; - - /* undone -- for now, single block images */ - //assert (im->block_count = 0 || im->block_count = 1); - - if (im->lines_per_block && im->lines_per_block < im->ysize) { - length = im->xsize * im->lines_per_block; - } - - im->arrow_borrow++; - // Initialize primitive fields - // Fixed length arrays are 1 buffer of validity, and the length in pixels. - // Data is in a child array. - *array = (struct ArrowArray) { - // Data description - .length = length, - .offset = 0, - .null_count = 0, - .n_buffers = 1, - .n_children = 1, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &release_const_array, - .private_data = im - }; - - // Allocate list of buffers - array->buffers = (const void**) calloc(1, sizeof(void*) * array->n_buffers); - //assert(array->buffers != NULL); - array->buffers[0] = NULL; // no nulls, null bitmap can be omitted - - - // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. - array->n_children = 1; - array->children = calloc(1, sizeof(struct ArrowArray*)); - array->children[0] = (struct ArrowArray*)calloc(1, sizeof(struct ArrowArray)); - - im->arrow_borrow++; - *array->children[0] = (struct ArrowArray) { - // Data description - .length = length * 4, - .offset = 0, - .null_count = 0, - .n_buffers = 2, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &release_const_array, - .private_data = im - }; - - array->children[0]->buffers = (const void**) calloc(2, sizeof(void*) * array->n_buffers); - - if (im->block) { - array->children[0]->buffers[1] = im->block; - } else { - array->children[0]->buffers[1] = im->blocks[0].ptr; - } - return 0; + im->arrow_borrow++; + // Initialize primitive fields + *array = (struct ArrowArray){// Data description + .length = length, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_const_array, + .private_data = im + }; + + // Allocate list of buffers + array->buffers = (const void **)malloc(sizeof(void *) * array->n_buffers); + // assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + + if (im->block) { + array->buffers[1] = im->block; + } else { + array->buffers[1] = im->blocks[0].ptr; + } + return 0; } +int +export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { + int length = im->xsize * im->ysize; -int export_imaging_array(Imaging im, struct ArrowArray* array) { - if (strcmp(im->arrow_band_format, "") == 0) { - return 1; - } + /* undone -- for now, single block images */ + // assert (im->block_count = 0 || im->block_count = 1); - if (im->bands == 1) { - return export_single_channel_array(im, array); - } + if (im->lines_per_block && im->lines_per_block < im->ysize) { + length = im->xsize * im->lines_per_block; + } + + im->arrow_borrow++; + // Initialize primitive fields + // Fixed length arrays are 1 buffer of validity, and the length in pixels. + // Data is in a child array. + *array = (struct ArrowArray){// Data description + .length = length, + .offset = 0, + .null_count = 0, + .n_buffers = 1, + .n_children = 1, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_const_array, + .private_data = im + }; + + // Allocate list of buffers + array->buffers = (const void **)calloc(1, sizeof(void *) * array->n_buffers); + // assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + + // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. + array->n_children = 1; + array->children = calloc(1, sizeof(struct ArrowArray *)); + array->children[0] = (struct ArrowArray *)calloc(1, sizeof(struct ArrowArray)); + + im->arrow_borrow++; + *array->children[0] = (struct ArrowArray){// Data description + .length = length * 4, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_const_array, + .private_data = im + }; + + array->children[0]->buffers = + (const void **)calloc(2, sizeof(void *) * array->n_buffers); + + if (im->block) { + array->children[0]->buffers[1] = im->block; + } else { + array->children[0]->buffers[1] = im->blocks[0].ptr; + } + return 0; +} + +int +export_imaging_array(Imaging im, struct ArrowArray *array) { + if (strcmp(im->arrow_band_format, "") == 0) { + return 1; + } + + if (im->bands == 1) { + return export_single_channel_array(im, array); + } - return export_fixed_pixel_array(im, array); + return export_fixed_pixel_array(im, array); } diff --git a/src/libImaging/Arrow.h b/src/libImaging/Arrow.h index 758be27b06d..0b285fe8053 100644 --- a/src/libImaging/Arrow.h +++ b/src/libImaging/Arrow.h @@ -13,36 +13,36 @@ #define ARROW_FLAG_MAP_KEYS_SORTED 4 struct ArrowSchema { - // Array type description - const char* format; - const char* name; - const char* metadata; - int64_t flags; - int64_t n_children; - struct ArrowSchema** children; - struct ArrowSchema* dictionary; - - // Release callback - void (*release)(struct ArrowSchema*); - // Opaque producer-specific data - void* private_data; + // Array type description + const char *format; + const char *name; + const char *metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema **children; + struct ArrowSchema *dictionary; + + // Release callback + void (*release)(struct ArrowSchema *); + // Opaque producer-specific data + void *private_data; }; struct ArrowArray { - // Array data description - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void** buffers; - struct ArrowArray** children; - struct ArrowArray* dictionary; - - // Release callback - void (*release)(struct ArrowArray*); - // Opaque producer-specific data - void* private_data; + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void **buffers; + struct ArrowArray **children; + struct ArrowArray *dictionary; + + // Release callback + void (*release)(struct ArrowArray *); + // Opaque producer-specific data + void *private_data; }; #endif // ARROW_C_DATA_INTERFACE diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index cf43873f390..2d1d45393d4 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -108,18 +108,15 @@ struct ImagingMemoryInstance { void (*destroy)(Imaging im); /* arrow */ - int arrow_borrow; /* Number of arrow arrays that have been allocated */ - char band_names[4][3]; /* names of bands, max 2 char + null terminator */ + int arrow_borrow; /* Number of arrow arrays that have been allocated */ + char band_names[4][3]; /* names of bands, max 2 char + null terminator */ char arrow_band_format[2]; /* single character + null terminator */ - int read_only; /* flag for read-only. set for arrow borrowed arrays */ + int read_only; /* flag for read-only. set for arrow borrowed arrays */ struct ArrowArray *arrow_array_capsule; /* upstream arrow array source */ - int blocks_count; /* Number of blocks that have been allocated */ + int blocks_count; /* Number of blocks that have been allocated */ int lines_per_block; /* Number of lines in a block have been allocated */ - - - }; #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)]) @@ -204,9 +201,13 @@ extern Imaging ImagingNewBlock(const char *mode, int xsize, int ysize); extern Imaging -ImagingNewArrow(const char *mode, int xsize, int ysize, - struct ArrowSchema *schema, - struct ArrowArray *external_array); +ImagingNewArrow( + const char *mode, + int xsize, + int ysize, + struct ArrowSchema *schema, + struct ArrowArray *external_array +); extern Imaging ImagingNewPrologue(const char *mode, int xsize, int ysize); @@ -725,9 +726,12 @@ _imaging_tell_pyFd(PyObject *fd); /* Arrow */ -extern int export_imaging_array(Imaging im, struct ArrowArray* array); -extern int export_imaging_schema(Imaging im, struct ArrowSchema* schema); -extern void export_uint32_type(struct ArrowSchema* schema); +extern int +export_imaging_array(Imaging im, struct ArrowArray *array); +extern int +export_imaging_schema(Imaging im, struct ArrowSchema *schema); +extern void +export_uint32_type(struct ArrowSchema *schema); /* Errcodes */ #define IMAGING_CODEC_END 1 diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index f2e03424753..31604706a9c 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -66,14 +66,14 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { /* 1-bit images */ im->bands = im->pixelsize = 1; im->linesize = xsize; - strcpy(im->band_names[0],"1"); + strcpy(im->band_names[0], "1"); } else if (strcmp(mode, "P") == 0) { /* 8-bit palette mapped images */ im->bands = im->pixelsize = 1; im->linesize = xsize; im->palette = ImagingPaletteNew("RGB"); - strcpy(im->band_names[0],"P"); + strcpy(im->band_names[0], "P"); } else if (strcmp(mode, "PA") == 0) { /* 8-bit palette with alpha */ @@ -81,36 +81,36 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 4; /* store in image32 memory */ im->linesize = xsize * 4; im->palette = ImagingPaletteNew("RGB"); - strcpy(im->band_names[0],"P"); - strcpy(im->band_names[1],"X"); - strcpy(im->band_names[2],"X"); - strcpy(im->band_names[3],"A"); + strcpy(im->band_names[0], "P"); + strcpy(im->band_names[1], "X"); + strcpy(im->band_names[2], "X"); + strcpy(im->band_names[3], "A"); } else if (strcmp(mode, "L") == 0) { /* 8-bit grayscale (luminance) images */ im->bands = im->pixelsize = 1; im->linesize = xsize; - strcpy(im->band_names[0],"L"); + strcpy(im->band_names[0], "L"); } else if (strcmp(mode, "LA") == 0) { /* 8-bit grayscale (luminance) with alpha */ im->bands = 2; im->pixelsize = 4; /* store in image32 memory */ im->linesize = xsize * 4; - strcpy(im->band_names[0],"L"); - strcpy(im->band_names[1],"X"); - strcpy(im->band_names[2],"X"); - strcpy(im->band_names[3],"A"); + strcpy(im->band_names[0], "L"); + strcpy(im->band_names[1], "X"); + strcpy(im->band_names[2], "X"); + strcpy(im->band_names[3], "A"); } else if (strcmp(mode, "La") == 0) { /* 8-bit grayscale (luminance) with premultiplied alpha */ im->bands = 2; im->pixelsize = 4; /* store in image32 memory */ im->linesize = xsize * 4; - strcpy(im->band_names[0],"L"); - strcpy(im->band_names[1],"X"); - strcpy(im->band_names[2],"X"); - strcpy(im->band_names[3],"a"); + strcpy(im->band_names[0], "L"); + strcpy(im->band_names[1], "X"); + strcpy(im->band_names[2], "X"); + strcpy(im->band_names[3], "a"); } else if (strcmp(mode, "F") == 0) { /* 32-bit floating point images */ @@ -118,8 +118,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 4; im->linesize = xsize * 4; im->type = IMAGING_TYPE_FLOAT32; - strcpy(im->arrow_band_format , "f"); - strcpy(im->band_names[0],"F"); + strcpy(im->arrow_band_format, "f"); + strcpy(im->band_names[0], "F"); } else if (strcmp(mode, "I") == 0) { /* 32-bit integer images */ @@ -127,8 +127,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 4; im->linesize = xsize * 4; im->type = IMAGING_TYPE_INT32; - strcpy(im->arrow_band_format , "i"); - strcpy(im->band_names[0],"I"); + strcpy(im->arrow_band_format, "i"); + strcpy(im->band_names[0], "I"); } else if (strcmp(mode, "I;16") == 0 || strcmp(mode, "I;16L") == 0 || strcmp(mode, "I;16B") == 0 || strcmp(mode, "I;16N") == 0) { @@ -138,18 +138,18 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->pixelsize = 2; im->linesize = xsize * 2; im->type = IMAGING_TYPE_SPECIAL; - strcpy(im->arrow_band_format , "s"); - strcpy(im->band_names[0],"I"); + strcpy(im->arrow_band_format, "s"); + strcpy(im->band_names[0], "I"); } else if (strcmp(mode, "RGB") == 0) { /* 24-bit true colour images */ im->bands = 3; im->pixelsize = 4; im->linesize = xsize * 4; - strcpy(im->band_names[0],"R"); - strcpy(im->band_names[1],"G"); - strcpy(im->band_names[2],"B"); - strcpy(im->band_names[3],"X"); + strcpy(im->band_names[0], "R"); + strcpy(im->band_names[1], "G"); + strcpy(im->band_names[2], "B"); + strcpy(im->band_names[3], "X"); } else if (strcmp(mode, "BGR;15") == 0) { /* EXPERIMENTAL */ @@ -159,7 +159,7 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->linesize = (xsize * 2 + 3) & -4; im->type = IMAGING_TYPE_SPECIAL; /* not allowing arrow due to line length packing */ - strcpy(im->arrow_band_format , ""); + strcpy(im->arrow_band_format, ""); } else if (strcmp(mode, "BGR;16") == 0) { /* EXPERIMENTAL */ @@ -169,7 +169,7 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->linesize = (xsize * 2 + 3) & -4; im->type = IMAGING_TYPE_SPECIAL; /* not allowing arrow due to line length packing */ - strcpy(im->arrow_band_format , ""); + strcpy(im->arrow_band_format, ""); } else if (strcmp(mode, "BGR;24") == 0) { /* EXPERIMENTAL */ @@ -179,53 +179,53 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->linesize = (xsize * 3 + 3) & -4; im->type = IMAGING_TYPE_SPECIAL; /* not allowing arrow due to line length packing */ - strcpy(im->arrow_band_format , ""); + strcpy(im->arrow_band_format, ""); } else if (strcmp(mode, "RGBX") == 0) { /* 32-bit true colour images with padding */ im->bands = im->pixelsize = 4; im->linesize = xsize * 4; - strcpy(im->band_names[0],"R"); - strcpy(im->band_names[1],"G"); - strcpy(im->band_names[2],"B"); - strcpy(im->band_names[3],"X"); + strcpy(im->band_names[0], "R"); + strcpy(im->band_names[1], "G"); + strcpy(im->band_names[2], "B"); + strcpy(im->band_names[3], "X"); } else if (strcmp(mode, "RGBA") == 0) { /* 32-bit true colour images with alpha */ im->bands = im->pixelsize = 4; im->linesize = xsize * 4; - strcpy(im->band_names[0],"R"); - strcpy(im->band_names[1],"G"); - strcpy(im->band_names[2],"B"); - strcpy(im->band_names[3],"A"); + strcpy(im->band_names[0], "R"); + strcpy(im->band_names[1], "G"); + strcpy(im->band_names[2], "B"); + strcpy(im->band_names[3], "A"); } else if (strcmp(mode, "RGBa") == 0) { /* 32-bit true colour images with premultiplied alpha */ im->bands = im->pixelsize = 4; im->linesize = xsize * 4; - strcpy(im->band_names[0],"R"); - strcpy(im->band_names[1],"G"); - strcpy(im->band_names[2],"B"); - strcpy(im->band_names[3],"a"); + strcpy(im->band_names[0], "R"); + strcpy(im->band_names[1], "G"); + strcpy(im->band_names[2], "B"); + strcpy(im->band_names[3], "a"); } else if (strcmp(mode, "CMYK") == 0) { /* 32-bit colour separation */ im->bands = im->pixelsize = 4; im->linesize = xsize * 4; - strcpy(im->band_names[0],"C"); - strcpy(im->band_names[1],"M"); - strcpy(im->band_names[2],"Y"); - strcpy(im->band_names[3],"K"); + strcpy(im->band_names[0], "C"); + strcpy(im->band_names[1], "M"); + strcpy(im->band_names[2], "Y"); + strcpy(im->band_names[3], "K"); } else if (strcmp(mode, "YCbCr") == 0) { /* 24-bit video format */ im->bands = 3; im->pixelsize = 4; im->linesize = xsize * 4; - strcpy(im->band_names[0],"Y"); - strcpy(im->band_names[1],"Cb"); - strcpy(im->band_names[2],"Cr"); - strcpy(im->band_names[3],"X"); + strcpy(im->band_names[0], "Y"); + strcpy(im->band_names[1], "Cb"); + strcpy(im->band_names[2], "Cr"); + strcpy(im->band_names[3], "X"); } else if (strcmp(mode, "LAB") == 0) { /* 24-bit color, luminance, + 2 color channels */ @@ -233,10 +233,10 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->bands = 3; im->pixelsize = 4; im->linesize = xsize * 4; - strcpy(im->band_names[0],"L"); - strcpy(im->band_names[1],"a"); - strcpy(im->band_names[2],"b"); - strcpy(im->band_names[3],"X"); + strcpy(im->band_names[0], "L"); + strcpy(im->band_names[1], "a"); + strcpy(im->band_names[2], "b"); + strcpy(im->band_names[3], "X"); } else if (strcmp(mode, "HSV") == 0) { /* 24-bit color, luminance, + 2 color channels */ @@ -244,10 +244,10 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { im->bands = 3; im->pixelsize = 4; im->linesize = xsize * 4; - strcpy(im->band_names[0],"H"); - strcpy(im->band_names[1],"S"); - strcpy(im->band_names[2],"V"); - strcpy(im->band_names[3],"X"); + strcpy(im->band_names[0], "H"); + strcpy(im->band_names[1], "S"); + strcpy(im->band_names[2], "V"); + strcpy(im->band_names[3], "X"); } else { free(im); @@ -301,8 +301,8 @@ ImagingDelete(Imaging im) { im->arrow_borrow--; - if (im->arrow_borrow>0) { - return; + if (im->arrow_borrow > 0) { + return; } if (im->palette) { @@ -561,7 +561,6 @@ ImagingAllocateBlock(Imaging im) { /* --------------------------- */ /* Don't allocate the image. */ - static void ImagingDestroyArrow(Imaging im) { if (im->arrow_array_capsule && im->arrow_array_capsule->release) { @@ -576,8 +575,8 @@ ImagingAllocateArrow(Imaging im, struct ArrowArray *external_array) { /* don't really allocate, but naming patterns */ Py_ssize_t y, i; - char* borrowed_buffer = NULL; - struct ArrowArray* arr = external_array; + char *borrowed_buffer = NULL; + struct ArrowArray *arr = external_array; /* overflow check for malloc */ if (im->linesize && im->ysize > INT_MAX / im->linesize) { @@ -596,7 +595,7 @@ ImagingAllocateArrow(Imaging im, struct ArrowArray *external_array) { borrowed_buffer = (char *)arr->buffers[1]; } - if (! borrowed_buffer) { + if (!borrowed_buffer) { // UNDONE better error here. // currently, only wanting one where return (Imaging)ImagingError_MemoryError(); @@ -685,9 +684,13 @@ ImagingNewBlock(const char *mode, int xsize, int ysize) { } Imaging -ImagingNewArrow(const char *mode, int xsize, int ysize, - struct ArrowSchema *schema, - struct ArrowArray *external_array) { +ImagingNewArrow( + const char *mode, + int xsize, + int ysize, + struct ArrowSchema *schema, + struct ArrowArray *external_array +) { /* A borrowed arrow array */ Imaging im; @@ -702,25 +705,19 @@ ImagingNewArrow(const char *mode, int xsize, int ysize, int64_t pixels = (int64_t)xsize * (int64_t)ysize; - if (((strcmp(schema->format, "i") == 0 && - im->pixelsize == 4) || - (strcmp(schema->format, im->arrow_band_format) == 0 && - im->bands == 1)) - && pixels == external_array->length) { + if (((strcmp(schema->format, "i") == 0 && im->pixelsize == 4) || + (strcmp(schema->format, im->arrow_band_format) == 0 && im->bands == 1)) && + pixels == external_array->length) { // one arrow element per, and it matches a pixelsize*char if (ImagingAllocateArrow(im, external_array)) { return im; } } - if (strcmp(schema->format, "+w:4") == 0 - && im->pixelsize == 4 - && schema->n_children > 0 - && schema->children - && strcmp(schema->children[0]->format, "C") == 0 - && pixels == external_array->length - && external_array->n_children == 1 - && external_array->children - && 4 * pixels == external_array->children[0]->length) { + if (strcmp(schema->format, "+w:4") == 0 && im->pixelsize == 4 && + schema->n_children > 0 && schema->children && + strcmp(schema->children[0]->format, "C") == 0 && + pixels == external_array->length && external_array->n_children == 1 && + external_array->children && 4 * pixels == external_array->children[0]->length) { // 4 up element of char into pixelsize == 4 if (ImagingAllocateArrow(im, external_array)) { return im; From 9d584a1014f36f9c297324c4a7c62e3020f05db4 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 25 Jan 2025 13:42:32 +0000 Subject: [PATCH 09/55] Lint Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- src/PIL/Image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index ef0a05b74c9..3409f575732 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -754,7 +754,7 @@ def __arrow_c_schema__(self) -> object: def __arrow_c_array__( self, requested_schema: object | None = None - ) -> Tuple[object, object]: + ) -> tuple[object, object]: self.load() return (self.im.__arrow_c_schema__(), self.im.__arrow_c_array__()) From 2b88b1c49db974dfb103f181c137a2900f81a7a6 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 25 Jan 2025 13:42:42 +0000 Subject: [PATCH 10/55] Typing Lint Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- src/PIL/Image.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 3409f575732..fbc24790a24 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3361,7 +3361,8 @@ def fromarray(obj: SupportsArrayInterface, mode: str | None = None) -> Image: def fromarrow(obj: SupportsArrowArrayIngerface, mode, size) -> ImageFile.ImageFile: if not hasattr(obj, "__arrow_c_array__"): - raise ValueError("arrow_c_array interface not found") + msg = "arrow_c_array interface not found" + raise ValueError(msg) (schema_capsule, array_capsule) = obj.__arrow_c_array__() _im = core.new_arrow(mode, size, schema_capsule, array_capsule) From af6425014409787078bc657e892f1035f196555d Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 25 Jan 2025 13:42:53 +0000 Subject: [PATCH 11/55] Lint Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- src/PIL/Image.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index fbc24790a24..f5005adbcb8 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3262,7 +3262,8 @@ def __array_interface__(self) -> dict[str, Any]: class SupportsArrowArrayInterface(Protocol): """ - An object that has an ``__arrow_c_array__`` method corresponding to the arrow c data interface. + An object that has an ``__arrow_c_array__`` method corresponding to the arrow c + data interface. """ def __arrow_c_array__( From 244dded7a8ba35d14822bb3a6c58d2fafce6e9cf Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 25 Jan 2025 13:43:01 +0000 Subject: [PATCH 12/55] Typing Lint Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- src/PIL/Image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index f5005adbcb8..96521657fa4 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3360,7 +3360,7 @@ def fromarray(obj: SupportsArrayInterface, mode: str | None = None) -> Image: return frombuffer(mode, size, obj, "raw", rawmode, 0, 1) -def fromarrow(obj: SupportsArrowArrayIngerface, mode, size) -> ImageFile.ImageFile: +def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image: if not hasattr(obj, "__arrow_c_array__"): msg = "arrow_c_array interface not found" raise ValueError(msg) From dbe0304bb3145e1aaf6a51383821648b772c8002 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 25 Jan 2025 14:12:31 +0000 Subject: [PATCH 13/55] Tests for destructors of the PyCapsules --- Tests/test_arrow.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index 68a4f96f09e..f1ab5ab6dcc 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -105,3 +105,17 @@ def test_lifetime2(): img2 = img.copy() px = img2.load() assert isinstance(px[0, 0], int) + + +def test_release_schema(): + # these should not error out, valgrind should be clean + img = hopper('L') + schema = img.__arrow_c_schema__() + del(schema) + +def test_release_array(): + # these should not error out, valgrind should be clean + img = hopper('L') + array, schema = img.__arrow_c_array__() + del(array) + del(schema) From 388da5c4a4ac34f4bfe280699731693eae79b13a Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 25 Jan 2025 15:12:41 +0000 Subject: [PATCH 14/55] Test rejection of incorrect modes --- Tests/test_arrow.py | 20 ++++++++++++++++++++ src/_imaging.c | 1 + src/libImaging/Storage.c | 29 ++++++++++++++++++++--------- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index f1ab5ab6dcc..fa39275c625 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -68,6 +68,26 @@ def test_to_array(mode: str, dtype: Any, mask: Any) -> None: assert_image_equal(img, reloaded) +@pytest.mark.parametrize( + "mode, dest_modes", + ( + ("L", ["I", "F", "LA", "RGB", "RGBA", "RGBX", "CMYK", "YCbCr", "HSV"]), + ("I", ["L", "F"]), # Technically I32 can work for any 4x8bit storage. + ("F", ["I", "L", "LA", "RGB", "RGBA", "RGBX", "CMYK", "YCbCr", "HSV"]), + ("LA", ["L", "F"]), + ("RGB", ["L", "F"]), + ("RGBA", ["L", "F"]), + ("RGBX", ["L", "F"]), + ("CMYK", ["L", "F"]), + ("YCbCr", ["L", "F"]), + ("HSV", ["L", "F"]), + ), +) +def test_invalid_array_type(mode: str, dest_modes: List[str]) -> None: + img = hopper(mode) + for dest_mode in dest_modes: + with pytest.raises(ValueError): + Image.fromarrow(img, dest_mode, img.size) def test_lifetime(): # valgrind shouldn't error out here. diff --git a/src/_imaging.c b/src/_imaging.c index 09d7096dcf3..727e8a49461 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -290,6 +290,7 @@ _new_arrow(PyObject *self, PyObject *args) { if (!ret && array->release) { array->release(array); array->release = NULL; + return ImagingError_ValueError("Invalid arrow array mode or size mismatch"); } return ret; } diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 31604706a9c..c49949d7170 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -705,25 +705,36 @@ ImagingNewArrow( int64_t pixels = (int64_t)xsize * (int64_t)ysize; - if (((strcmp(schema->format, "i") == 0 && im->pixelsize == 4) || - (strcmp(schema->format, im->arrow_band_format) == 0 && im->bands == 1)) && - pixels == external_array->length) { + // fmt:off // don't reformat this + if (((strcmp(schema->format, "I") == 0 + && im->pixelsize == 4 + && im->bands >= 2) // INT32 into any INT32 Storage mode + || + (strcmp(schema->format, im->arrow_band_format) == 0 + && im->bands == 1)) // Single band match + && pixels == external_array->length) { // one arrow element per, and it matches a pixelsize*char if (ImagingAllocateArrow(im, external_array)) { return im; } } - if (strcmp(schema->format, "+w:4") == 0 && im->pixelsize == 4 && - schema->n_children > 0 && schema->children && - strcmp(schema->children[0]->format, "C") == 0 && - pixels == external_array->length && external_array->n_children == 1 && - external_array->children && 4 * pixels == external_array->children[0]->length) { + // linter: don't mess with the formatting here + if (strcmp(schema->format, "+w:4") == 0 // 4 up array + && im->pixelsize == 4 // storage as 32 bpc + && schema->n_children > 0 // make sure schema is well formed. + && schema->children // make sure schema is well formed + && strcmp(schema->children[0]->format, "C") == 0 // Expected format + && strcmp(im->arrow_band_format, "C") == 0 // Expected Format + && pixels == external_array->length // expected length + && external_array->n_children == 1 // array is well formed + && external_array->children // array is well formed + && 4 * pixels == external_array->children[0]->length) { // 4 up element of char into pixelsize == 4 if (ImagingAllocateArrow(im, external_array)) { return im; } } - + // fmt: on ImagingDelete(im); return NULL; } From 55f5351e3de1834bea55a55fc25d8ba6320ec6e9 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 25 Jan 2025 15:43:22 +0000 Subject: [PATCH 15/55] Test for size, add offset support --- Tests/test_arrow.py | 8 ++++++++ src/libImaging/Storage.c | 24 ++++++++---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index fa39275c625..5d7d3cdb595 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -89,6 +89,14 @@ def test_invalid_array_type(mode: str, dest_modes: List[str]) -> None: with pytest.raises(ValueError): Image.fromarrow(img, dest_mode, img.size) +def test_invalid_array_size(): + img = hopper('RGB') + + assert img.size != (10,10) + with pytest.raises(ValueError): + Image.fromarrow(img, 'RGB', (10,10)) + + def test_lifetime(): # valgrind shouldn't error out here. # arrays should be accessible after the image is deleted. diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index c49949d7170..701e89cefe1 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -571,34 +571,26 @@ ImagingDestroyArrow(Imaging im) { } Imaging -ImagingAllocateArrow(Imaging im, struct ArrowArray *external_array) { - /* don't really allocate, but naming patterns */ +ImagingBorrowArrow(Imaging im, + struct ArrowArray *external_array, + int offset_width) { + // offset_width is the # of char* for a single offset from arrow Py_ssize_t y, i; char *borrowed_buffer = NULL; struct ArrowArray *arr = external_array; - /* overflow check for malloc */ - if (im->linesize && im->ysize > INT_MAX / im->linesize) { - return (Imaging)ImagingError_MemoryError(); - } - if (arr->n_children == 1) { arr = arr->children[0]; } if (arr->n_buffers == 2) { - // undone offset. offset here is # of elements, - // so depends on the size of the element - // undone image is char*, arrow is void * // buffer 0 is the null list // buffer 1 is the data - borrowed_buffer = (char *)arr->buffers[1]; + borrowed_buffer = (char *)arr->buffers[1] + (offset_width*arr->offset); } if (!borrowed_buffer) { - // UNDONE better error here. - // currently, only wanting one where - return (Imaging)ImagingError_MemoryError(); + return (Imaging)ImagingError_ValueError("Arrow Array, exactly 2 buffers required"); } for (y = i = 0; y < im->ysize; y++) { @@ -714,7 +706,7 @@ ImagingNewArrow( && im->bands == 1)) // Single band match && pixels == external_array->length) { // one arrow element per, and it matches a pixelsize*char - if (ImagingAllocateArrow(im, external_array)) { + if (ImagingBorrowArrow(im, external_array, im->pixelsize)) { return im; } } @@ -730,7 +722,7 @@ ImagingNewArrow( && external_array->children // array is well formed && 4 * pixels == external_array->children[0]->length) { // 4 up element of char into pixelsize == 4 - if (ImagingAllocateArrow(im, external_array)) { + if (ImagingBorrowArrow(im, external_array, 1)) { return im; } } From ad492ee4d80c5578dc75afe5ee4d8abba3c7a9f2 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 25 Jan 2025 16:21:15 +0000 Subject: [PATCH 16/55] Pull readonly in from the C level --- Tests/test_arrow.py | 8 ++++++++ src/PIL/Image.py | 8 ++++++++ src/_imaging.c | 6 ++++++ 3 files changed, 22 insertions(+) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index 5d7d3cdb595..c5237c902b3 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -147,3 +147,11 @@ def test_release_array(): array, schema = img.__arrow_c_array__() del(array) del(schema) + + +def test_readonly(): + img = hopper('L') + reloaded = Image.fromarrow(img, img.mode, img.size) + assert reloaded.readonly == 1 + reloaded._readonly = 0 + assert reloaded.readonly == 1 diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 96521657fa4..ecca5a52ae4 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -584,6 +584,14 @@ def size(self) -> tuple[int, int]: def mode(self) -> str: return self._mode + @property + def readonly(self) -> int: + return (self._im and self._im.readonly) or self._readonly + + @readonly.setter + def readonly(self, readonly: int) -> None: + self._readonly = readonly + def _new(self, im: core.ImagingCore) -> Image: new = Image() new.im = im diff --git a/src/_imaging.c b/src/_imaging.c index 727e8a49461..c7ea0b9329f 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -3821,6 +3821,11 @@ _getattr_unsafe_ptrs(ImagingObject *self, void *closure) { ); } +static PyObject * +_getattr_readonly(ImagingObject *self, void *closure) { + return PyLong_FromLong(self->image->read_only); +} + static struct PyGetSetDef getsetters[] = { {"mode", (getter)_getattr_mode}, {"size", (getter)_getattr_size}, @@ -3828,6 +3833,7 @@ static struct PyGetSetDef getsetters[] = { {"id", (getter)_getattr_id}, {"ptr", (getter)_getattr_ptr}, {"unsafe_ptrs", (getter)_getattr_unsafe_ptrs}, + {"readonly", (getter)_getattr_readonly}, {NULL} }; From d02417e411d0a49e00aec0ec2bbe223a38da28ff Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 16:25:41 +0000 Subject: [PATCH 17/55] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Tests/test_arrow.py | 23 +++++++++++++---------- src/libImaging/Storage.c | 37 +++++++++++++++++-------------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index c5237c902b3..69193394db8 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -68,11 +68,12 @@ def test_to_array(mode: str, dtype: Any, mask: Any) -> None: assert_image_equal(img, reloaded) + @pytest.mark.parametrize( "mode, dest_modes", ( ("L", ["I", "F", "LA", "RGB", "RGBA", "RGBX", "CMYK", "YCbCr", "HSV"]), - ("I", ["L", "F"]), # Technically I32 can work for any 4x8bit storage. + ("I", ["L", "F"]), # Technically I32 can work for any 4x8bit storage. ("F", ["I", "L", "LA", "RGB", "RGBA", "RGBX", "CMYK", "YCbCr", "HSV"]), ("LA", ["L", "F"]), ("RGB", ["L", "F"]), @@ -89,12 +90,13 @@ def test_invalid_array_type(mode: str, dest_modes: List[str]) -> None: with pytest.raises(ValueError): Image.fromarrow(img, dest_mode, img.size) + def test_invalid_array_size(): - img = hopper('RGB') + img = hopper("RGB") - assert img.size != (10,10) + assert img.size != (10, 10) with pytest.raises(ValueError): - Image.fromarrow(img, 'RGB', (10,10)) + Image.fromarrow(img, "RGB", (10, 10)) def test_lifetime(): @@ -137,20 +139,21 @@ def test_lifetime2(): def test_release_schema(): # these should not error out, valgrind should be clean - img = hopper('L') + img = hopper("L") schema = img.__arrow_c_schema__() - del(schema) + del schema + def test_release_array(): # these should not error out, valgrind should be clean - img = hopper('L') + img = hopper("L") array, schema = img.__arrow_c_array__() - del(array) - del(schema) + del array + del schema def test_readonly(): - img = hopper('L') + img = hopper("L") reloaded = Image.fromarrow(img, img.mode, img.size) assert reloaded.readonly == 1 reloaded._readonly = 0 diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 701e89cefe1..7ac5f63a5cb 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -571,9 +571,7 @@ ImagingDestroyArrow(Imaging im) { } Imaging -ImagingBorrowArrow(Imaging im, - struct ArrowArray *external_array, - int offset_width) { +ImagingBorrowArrow(Imaging im, struct ArrowArray *external_array, int offset_width) { // offset_width is the # of char* for a single offset from arrow Py_ssize_t y, i; @@ -586,11 +584,12 @@ ImagingBorrowArrow(Imaging im, if (arr->n_buffers == 2) { // buffer 0 is the null list // buffer 1 is the data - borrowed_buffer = (char *)arr->buffers[1] + (offset_width*arr->offset); + borrowed_buffer = (char *)arr->buffers[1] + (offset_width * arr->offset); } if (!borrowed_buffer) { - return (Imaging)ImagingError_ValueError("Arrow Array, exactly 2 buffers required"); + return (Imaging + )ImagingError_ValueError("Arrow Array, exactly 2 buffers required"); } for (y = i = 0; y < im->ysize; y++) { @@ -698,12 +697,10 @@ ImagingNewArrow( int64_t pixels = (int64_t)xsize * (int64_t)ysize; // fmt:off // don't reformat this - if (((strcmp(schema->format, "I") == 0 - && im->pixelsize == 4 - && im->bands >= 2) // INT32 into any INT32 Storage mode - || - (strcmp(schema->format, im->arrow_band_format) == 0 - && im->bands == 1)) // Single band match + if (((strcmp(schema->format, "I") == 0 && im->pixelsize == 4 && im->bands >= 2 + ) // INT32 into any INT32 Storage mode + || (strcmp(schema->format, im->arrow_band_format) == 0 && im->bands == 1) + ) // Single band match && pixels == external_array->length) { // one arrow element per, and it matches a pixelsize*char if (ImagingBorrowArrow(im, external_array, im->pixelsize)) { @@ -711,15 +708,15 @@ ImagingNewArrow( } } // linter: don't mess with the formatting here - if (strcmp(schema->format, "+w:4") == 0 // 4 up array - && im->pixelsize == 4 // storage as 32 bpc - && schema->n_children > 0 // make sure schema is well formed. - && schema->children // make sure schema is well formed - && strcmp(schema->children[0]->format, "C") == 0 // Expected format - && strcmp(im->arrow_band_format, "C") == 0 // Expected Format - && pixels == external_array->length // expected length - && external_array->n_children == 1 // array is well formed - && external_array->children // array is well formed + if (strcmp(schema->format, "+w:4") == 0 // 4 up array + && im->pixelsize == 4 // storage as 32 bpc + && schema->n_children > 0 // make sure schema is well formed. + && schema->children // make sure schema is well formed + && strcmp(schema->children[0]->format, "C") == 0 // Expected format + && strcmp(im->arrow_band_format, "C") == 0 // Expected Format + && pixels == external_array->length // expected length + && external_array->n_children == 1 // array is well formed + && external_array->children // array is well formed && 4 * pixels == external_array->children[0]->length) { // 4 up element of char into pixelsize == 4 if (ImagingBorrowArrow(im, external_array, 1)) { From 6fad11a926bc8fb15620b77503b5a5f5cb9efdcc Mon Sep 17 00:00:00 2001 From: wiredfool Date: Thu, 30 Jan 2025 21:00:02 +0000 Subject: [PATCH 18/55] added mutex around refcount, renamed arrow_borrow to refcount --- src/libImaging/Arrow.c | 12 +++++++++--- src/libImaging/Imaging.h | 8 +++++++- src/libImaging/Storage.c | 21 +++++++++++++-------- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index f7bcfcf97fc..0ad049f6b15 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -188,7 +188,9 @@ export_single_channel_array(Imaging im, struct ArrowArray *array) { length = im->xsize * im->lines_per_block; } - im->arrow_borrow++; + MUTEX_LOCK(im->mutex); + im->refcount++; + MUTEX_UNLOCK(im->mutex); // Initialize primitive fields *array = (struct ArrowArray){// Data description .length = length, @@ -227,7 +229,9 @@ export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { length = im->xsize * im->lines_per_block; } - im->arrow_borrow++; + MUTEX_LOCK(im->mutex); + im->refcount++; + MUTEX_UNLOCK(im->mutex); // Initialize primitive fields // Fixed length arrays are 1 buffer of validity, and the length in pixels. // Data is in a child array. @@ -254,7 +258,9 @@ export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { array->children = calloc(1, sizeof(struct ArrowArray *)); array->children[0] = (struct ArrowArray *)calloc(1, sizeof(struct ArrowArray)); - im->arrow_borrow++; + MUTEX_LOCK(im->mutex); + im->refcount++; + MUTEX_UNLOCK(im->mutex); *array->children[0] = (struct ArrowArray){// Data description .length = length * 4, .offset = 0, diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 2d1d45393d4..9fddf5eab4e 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -108,7 +108,7 @@ struct ImagingMemoryInstance { void (*destroy)(Imaging im); /* arrow */ - int arrow_borrow; /* Number of arrow arrays that have been allocated */ + int refcount; /* Number of arrow arrays that have been allocated */ char band_names[4][3]; /* names of bands, max 2 char + null terminator */ char arrow_band_format[2]; /* single character + null terminator */ @@ -117,6 +117,12 @@ struct ImagingMemoryInstance { int blocks_count; /* Number of blocks that have been allocated */ int lines_per_block; /* Number of lines in a block have been allocated */ + +#ifdef Py_GIL_DISABLED + PyMutex mutex; +#endif + + }; #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)]) diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 7ac5f63a5cb..9e6439b881f 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -58,7 +58,7 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { /* Setup image descriptor */ im->xsize = xsize; im->ysize = ysize; - im->arrow_borrow = 1; + im->refcount = 1; im->type = IMAGING_TYPE_UINT8; strcpy(im->arrow_band_format, "C"); @@ -299,11 +299,14 @@ ImagingDelete(Imaging im) { return; } - im->arrow_borrow--; + MUTEX_LOCK(im->mutex); + im->refcount--; - if (im->arrow_borrow > 0) { - return; + if (im->refcount > 0) { + MUTEX_UNLOCK(im->mutex); + return; } + MUTEX_UNLOCK(im->mutex); if (im->palette) { ImagingPaletteDelete(im->palette); @@ -697,10 +700,12 @@ ImagingNewArrow( int64_t pixels = (int64_t)xsize * (int64_t)ysize; // fmt:off // don't reformat this - if (((strcmp(schema->format, "I") == 0 && im->pixelsize == 4 && im->bands >= 2 - ) // INT32 into any INT32 Storage mode - || (strcmp(schema->format, im->arrow_band_format) == 0 && im->bands == 1) - ) // Single band match + if (((strcmp(schema->format, "I") == 0 // int32 + && im->pixelsize == 4 // 4xchar* storage + && im->bands >= 2) // INT32 into any INT32 Storage mode + || + (strcmp(schema->format, im->arrow_band_format) == 0 // same mode + && im->bands == 1)) // Single band match && pixels == external_array->length) { // one arrow element per, and it matches a pixelsize*char if (ImagingBorrowArrow(im, external_array, im->pixelsize)) { From 4fc8328fe9d648fa70cd6e9bed96e84ebc6cfc19 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Mon, 3 Feb 2025 11:48:09 +0000 Subject: [PATCH 19/55] remove unused code --- src/libImaging/Arrow.c | 50 ---------------------------------------- src/libImaging/Imaging.h | 2 -- 2 files changed, 52 deletions(-) diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index 0ad049f6b15..7dcd04d5ccb 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -112,57 +112,7 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema) { return 0; } -static void -release_simple_type(struct ArrowSchema *schema) { - // Mark released - schema->release = NULL; -} - -void -export_uint32_type(struct ArrowSchema *schema) { - *schema = (struct ArrowSchema){// Type description - .format = "I", - .name = "", - .metadata = NULL, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &release_simple_type - }; -} - -static void -release_uint32_array(struct ArrowArray *array) { - // assert(array->n_buffers == 2); - // Free the buffers and the buffers array - free((void *)array->buffers[1]); - free(array->buffers); - // Mark released - array->release = NULL; -} -void -export_uint32_array(const uint32_t *data, int64_t nitems, struct ArrowArray *array) { - // Initialize primitive fields - *array = (struct ArrowArray){// Data description - .length = nitems, - .offset = 0, - .null_count = 0, - .n_buffers = 2, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &release_uint32_array - }; - // Allocate list of buffers - array->buffers = (const void **)malloc(sizeof(void *) * array->n_buffers); - // assert(array->buffers != NULL); - array->buffers[0] = NULL; // no nulls, null bitmap can be omitted - array->buffers[1] = data; -} static void release_const_array(struct ArrowArray *array) { diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 9fddf5eab4e..750e8a71e26 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -736,8 +736,6 @@ extern int export_imaging_array(Imaging im, struct ArrowArray *array); extern int export_imaging_schema(Imaging im, struct ArrowSchema *schema); -extern void -export_uint32_type(struct ArrowSchema *schema); /* Errcodes */ #define IMAGING_CODEC_END 1 From e7bd152aac559ec5c8c501bb696055e4272c9cc7 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Mon, 3 Feb 2025 15:23:55 +0000 Subject: [PATCH 20/55] Error handling: * Return memory error for allocation errors * Return value error for invalid layout (block) or bad mode) * Free children on releasing the array * Only decrement refcount on the leaf array release --- Tests/test_arrow.py | 41 ++++++++++++++++ src/_imaging.c | 32 ++++++++++-- src/libImaging/Arrow.c | 102 +++++++++++++++++++++++++++++---------- src/libImaging/Imaging.h | 3 ++ src/libImaging/Storage.c | 2 +- 5 files changed, 150 insertions(+), 30 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index 69193394db8..51b680aa18c 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -158,3 +158,44 @@ def test_readonly(): assert reloaded.readonly == 1 reloaded._readonly = 0 assert reloaded.readonly == 1 + +def test_multiblock_l_image(): + block_size = Image.core.get_block_size() + + # check a 2 block image in single channel mode + size = (4096, 2*block_size//4096) + img = Image.new('L', size, 128) + + with pytest.raises(ValueError): + (schema, arr) = img.__arrow_c_array__() + +def test_multiblock__rgba_image(): + block_size = Image.core.get_block_size() + + # check a 2 block image in 4 channel mode + size = (4096, (block_size//4096) //2) + img = Image.new('RGBA', size, (128,127,126,125)) + + with pytest.raises(ValueError): + (schema, arr) = img.__arrow_c_array__() + + +def test_multiblock_l_schema(): + block_size = Image.core.get_block_size() + + # check a 2 block image in single channel mode + size = (4096, 2*block_size//4096) + img = Image.new('L', size, 128) + + with pytest.raises(ValueError): + schema = img.__arrow_c_schema__() + +def test_multiblock__rgba_schema(): + block_size = Image.core.get_block_size() + + # check a 2 block image in 4 channel mode + size = (4096, (block_size//4096) //2) + img = Image.new('RGBA', size, (128,127,126,125)) + + with pytest.raises(ValueError): + schema= img.__arrow_c_schema__() diff --git a/src/_imaging.c b/src/_imaging.c index c7ea0b9329f..3d2b034b796 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -227,6 +227,20 @@ PyImaging_GetBuffer(PyObject *buffer, Py_buffer *view) { /* Arrow HANDLING */ /* -------------------------------------------------------------------- */ +PyObject * +ArrowError(int err) { + if (err == IMAGING_CODEC_MEMORY) { + return ImagingError_MemoryError(); + } + if (err == IMAGING_ARROW_INCOMPATIBLE_MODE) { + return ImagingError_ValueError("Incompatible Pillow mode for Arrow Array"); + } + if (err == IMAGING_ARROW_MEMORY_LAYOUT) { + return ImagingError_ValueError("Image is in multiple array blocks, use imaging_new_block for zero copy"); + } + return ImagingError_ValueError("Unknown Error"); +} + void ReleaseArrowSchemaPyCapsule(PyObject *capsule) { struct ArrowSchema *schema = @@ -241,8 +255,12 @@ PyObject * ExportArrowSchemaPyCapsule(ImagingObject *self) { struct ArrowSchema *schema = (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema)); - export_imaging_schema(self->image, schema); - return PyCapsule_New(schema, "arrow_schema", ReleaseArrowSchemaPyCapsule); + int err = export_imaging_schema(self->image, schema); + if (err == 0) { + return PyCapsule_New(schema, "arrow_schema", ReleaseArrowSchemaPyCapsule); + } + free(schema); + return ArrowError(err); } void @@ -259,10 +277,16 @@ PyObject * ExportArrowArrayPyCapsule(ImagingObject *self) { struct ArrowArray *array = (struct ArrowArray *)calloc(1, sizeof(struct ArrowArray)); - export_imaging_array(self->image, array); - return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule); + int err = export_imaging_array(self->image, array); + if (err == 0) { + return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule); + } + free(array); + // raise error here + return ArrowError(err); } + static PyObject * _new_arrow(PyObject *self, PyObject *args) { char *mode; diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index 7dcd04d5ccb..dd6c8fc385d 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -24,21 +24,26 @@ ReleaseExportedSchema(struct ArrowSchema *array) { free((void *)array->name); array->name = NULL; } + if (array->metadata) { + free((void *)array->metadata); + array->metadata = NULL; + } // Release children for (int64_t i = 0; i < array->n_children; ++i) { struct ArrowSchema *child = array->children[i]; if (child->release != NULL) { child->release(child); - // assert(child->release == NULL); + child->release = NULL; } + // UNDONE -- should I be releasing the children? } // Release dictionary struct ArrowSchema *dict = array->dictionary; if (dict != NULL && dict->release != NULL) { dict->release(dict); - // assert(dict->release == NULL); + dict->release = NULL; } // TODO here: release and/or deallocate all data directly owned by @@ -58,28 +63,28 @@ export_named_type(struct ArrowSchema *schema, char *format, char *name) { formatp = calloc(format_len, 1); if (!formatp) { - return 1; + return IMAGING_CODEC_MEMORY; } namep = calloc(name_len, 1); if (!namep) { free(formatp); - return 1; + return IMAGING_CODEC_MEMORY; } strncpy(formatp, format, format_len); strncpy(namep, name, name_len); *schema = (struct ArrowSchema){// Type description - .format = formatp, - .name = namep, - .metadata = NULL, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &ReleaseExportedSchema + .format = formatp, + .name = namep, + .metadata = NULL, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &ReleaseExportedSchema }; return 0; } @@ -89,7 +94,12 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema) { int retval = 0; if (strcmp(im->arrow_band_format, "") == 0) { - return 1; + return IMAGING_ARROW_INCOMPATIBLE_MODE; + } + + /* for now, single block images */ + if (!(im->blocks_count == 0 || im->blocks_count == 1)){ + return IMAGING_ARROW_MEMORY_LAYOUT; } if (im->bands == 1) { @@ -97,17 +107,18 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema) { } retval = export_named_type(schema, "+w:4", ""); - if (retval) { + if (retval != 0) { return retval; } // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. schema->n_children = 1; schema->children = calloc(1, sizeof(struct ArrowSchema *)); schema->children[0] = (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema)); - if (export_named_type(schema->children[0], im->arrow_band_format, "pixel")) { + retval = export_named_type(schema->children[0], im->arrow_band_format, "pixel"); + if (retval != 0) { free(schema->children[0]); schema->release(schema); - return 2; + return retval; } return 0; } @@ -118,11 +129,27 @@ static void release_const_array(struct ArrowArray *array) { Imaging im = (Imaging)array->private_data; - ImagingDelete(im); + if (array->n_children == 0) { + ImagingDelete(im); + } - // assert(array->n_buffers == 2); // Free the buffers and the buffers array - free(array->buffers); + if (array->buffers) { + free(array->buffers); + array->buffers = NULL; + } + if (array->children) { + // undone -- does arrow release all the children recursively. + for (int i=0; i < array->n_children; i++) { + if (array->children[i]->release){ + array->children[i]->release(array->children[i]); + array->children[i]->release = NULL; + free(array->children[i]); + } + } + free(array->children); + array->children = NULL; + } // Mark released array->release = NULL; } @@ -131,8 +158,10 @@ int export_single_channel_array(Imaging im, struct ArrowArray *array) { int length = im->xsize * im->ysize; - /* undone -- for now, single block images */ - // assert (im->block_count = 0 || im->block_count = 1); + /* for now, single block images */ + if (!(im->blocks_count == 0 || im->blocks_count == 1)){ + return IMAGING_ARROW_MEMORY_LAYOUT; + } if (im->lines_per_block && im->lines_per_block < im->ysize) { length = im->xsize * im->lines_per_block; @@ -172,8 +201,10 @@ int export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { int length = im->xsize * im->ysize; - /* undone -- for now, single block images */ - // assert (im->block_count = 0 || im->block_count = 1); + /* for now, single block images */ + if (!(im->blocks_count == 0 || im->blocks_count == 1)) { + return IMAGING_ARROW_MEMORY_LAYOUT; + } if (im->lines_per_block && im->lines_per_block < im->ysize) { length = im->xsize * im->lines_per_block; @@ -200,13 +231,22 @@ export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { // Allocate list of buffers array->buffers = (const void **)calloc(1, sizeof(void *) * array->n_buffers); + if (!array->buffers) { + goto err; + } // assert(array->buffers != NULL); array->buffers[0] = NULL; // no nulls, null bitmap can be omitted // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. array->n_children = 1; array->children = calloc(1, sizeof(struct ArrowArray *)); + if (!array->children) { + goto err; + } array->children[0] = (struct ArrowArray *)calloc(1, sizeof(struct ArrowArray)); + if (!array->children[0]) { + goto err; + } MUTEX_LOCK(im->mutex); im->refcount++; @@ -233,12 +273,24 @@ export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { array->children[0]->buffers[1] = im->blocks[0].ptr; } return 0; + + err: + if (array->children[0]) { + free(array->children[0]); + } + if (array->children) { + free(array->children); + } + if (array->buffers) { + free(array->buffers); + } + return IMAGING_CODEC_MEMORY; } int export_imaging_array(Imaging im, struct ArrowArray *array) { if (strcmp(im->arrow_band_format, "") == 0) { - return 1; + return IMAGING_ARROW_INCOMPATIBLE_MODE; } if (im->bands == 1) { diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 750e8a71e26..7136e7ff965 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -744,6 +744,9 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema); #define IMAGING_CODEC_UNKNOWN -3 #define IMAGING_CODEC_CONFIG -8 #define IMAGING_CODEC_MEMORY -9 +#define IMAGING_ARROW_INCOMPATIBLE_MODE -10 +#define IMAGING_ARROW_MEMORY_LAYOUT -11 + #include "ImagingUtils.h" extern UINT8 *clip8_lookups; diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 9e6439b881f..fd75a55849a 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -703,7 +703,7 @@ ImagingNewArrow( if (((strcmp(schema->format, "I") == 0 // int32 && im->pixelsize == 4 // 4xchar* storage && im->bands >= 2) // INT32 into any INT32 Storage mode - || + || // (()||()) && (strcmp(schema->format, im->arrow_band_format) == 0 // same mode && im->bands == 1)) // Single band match && pixels == external_array->length) { From 9f94d4f9f8997c0c3a73d5fd19338ba4ea0a5b25 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 15:29:27 +0000 Subject: [PATCH 21/55] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Tests/test_arrow.py | 21 ++++++++++++--------- src/_imaging.c | 13 +++++++------ src/libImaging/Arrow.c | 30 ++++++++++++++---------------- src/libImaging/Imaging.h | 3 --- src/libImaging/Storage.c | 12 ++++++------ 5 files changed, 39 insertions(+), 40 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index 51b680aa18c..231db651242 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -159,22 +159,24 @@ def test_readonly(): reloaded._readonly = 0 assert reloaded.readonly == 1 + def test_multiblock_l_image(): block_size = Image.core.get_block_size() # check a 2 block image in single channel mode - size = (4096, 2*block_size//4096) - img = Image.new('L', size, 128) + size = (4096, 2 * block_size // 4096) + img = Image.new("L", size, 128) with pytest.raises(ValueError): (schema, arr) = img.__arrow_c_array__() + def test_multiblock__rgba_image(): block_size = Image.core.get_block_size() # check a 2 block image in 4 channel mode - size = (4096, (block_size//4096) //2) - img = Image.new('RGBA', size, (128,127,126,125)) + size = (4096, (block_size // 4096) // 2) + img = Image.new("RGBA", size, (128, 127, 126, 125)) with pytest.raises(ValueError): (schema, arr) = img.__arrow_c_array__() @@ -184,18 +186,19 @@ def test_multiblock_l_schema(): block_size = Image.core.get_block_size() # check a 2 block image in single channel mode - size = (4096, 2*block_size//4096) - img = Image.new('L', size, 128) + size = (4096, 2 * block_size // 4096) + img = Image.new("L", size, 128) with pytest.raises(ValueError): schema = img.__arrow_c_schema__() + def test_multiblock__rgba_schema(): block_size = Image.core.get_block_size() # check a 2 block image in 4 channel mode - size = (4096, (block_size//4096) //2) - img = Image.new('RGBA', size, (128,127,126,125)) + size = (4096, (block_size // 4096) // 2) + img = Image.new("RGBA", size, (128, 127, 126, 125)) with pytest.raises(ValueError): - schema= img.__arrow_c_schema__() + schema = img.__arrow_c_schema__() diff --git a/src/_imaging.c b/src/_imaging.c index 3d2b034b796..fe4c806cc45 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -230,13 +230,15 @@ PyImaging_GetBuffer(PyObject *buffer, Py_buffer *view) { PyObject * ArrowError(int err) { if (err == IMAGING_CODEC_MEMORY) { - return ImagingError_MemoryError(); + return ImagingError_MemoryError(); } if (err == IMAGING_ARROW_INCOMPATIBLE_MODE) { - return ImagingError_ValueError("Incompatible Pillow mode for Arrow Array"); + return ImagingError_ValueError("Incompatible Pillow mode for Arrow Array"); } if (err == IMAGING_ARROW_MEMORY_LAYOUT) { - return ImagingError_ValueError("Image is in multiple array blocks, use imaging_new_block for zero copy"); + return ImagingError_ValueError( + "Image is in multiple array blocks, use imaging_new_block for zero copy" + ); } return ImagingError_ValueError("Unknown Error"); } @@ -257,7 +259,7 @@ ExportArrowSchemaPyCapsule(ImagingObject *self) { (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema)); int err = export_imaging_schema(self->image, schema); if (err == 0) { - return PyCapsule_New(schema, "arrow_schema", ReleaseArrowSchemaPyCapsule); + return PyCapsule_New(schema, "arrow_schema", ReleaseArrowSchemaPyCapsule); } free(schema); return ArrowError(err); @@ -279,14 +281,13 @@ ExportArrowArrayPyCapsule(ImagingObject *self) { (struct ArrowArray *)calloc(1, sizeof(struct ArrowArray)); int err = export_imaging_array(self->image, array); if (err == 0) { - return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule); + return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule); } free(array); // raise error here return ArrowError(err); } - static PyObject * _new_arrow(PyObject *self, PyObject *args) { char *mode; diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index dd6c8fc385d..29aadb5e100 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -76,15 +76,15 @@ export_named_type(struct ArrowSchema *schema, char *format, char *name) { strncpy(namep, name, name_len); *schema = (struct ArrowSchema){// Type description - .format = formatp, - .name = namep, - .metadata = NULL, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // Bookkeeping - .release = &ReleaseExportedSchema + .format = formatp, + .name = namep, + .metadata = NULL, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &ReleaseExportedSchema }; return 0; } @@ -98,7 +98,7 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema) { } /* for now, single block images */ - if (!(im->blocks_count == 0 || im->blocks_count == 1)){ + if (!(im->blocks_count == 0 || im->blocks_count == 1)) { return IMAGING_ARROW_MEMORY_LAYOUT; } @@ -123,8 +123,6 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema) { return 0; } - - static void release_const_array(struct ArrowArray *array) { Imaging im = (Imaging)array->private_data; @@ -140,8 +138,8 @@ release_const_array(struct ArrowArray *array) { } if (array->children) { // undone -- does arrow release all the children recursively. - for (int i=0; i < array->n_children; i++) { - if (array->children[i]->release){ + for (int i = 0; i < array->n_children; i++) { + if (array->children[i]->release) { array->children[i]->release(array->children[i]); array->children[i]->release = NULL; free(array->children[i]); @@ -159,7 +157,7 @@ export_single_channel_array(Imaging im, struct ArrowArray *array) { int length = im->xsize * im->ysize; /* for now, single block images */ - if (!(im->blocks_count == 0 || im->blocks_count == 1)){ + if (!(im->blocks_count == 0 || im->blocks_count == 1)) { return IMAGING_ARROW_MEMORY_LAYOUT; } @@ -274,7 +272,7 @@ export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { } return 0; - err: +err: if (array->children[0]) { free(array->children[0]); } diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 7136e7ff965..ffda969267f 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -121,8 +121,6 @@ struct ImagingMemoryInstance { #ifdef Py_GIL_DISABLED PyMutex mutex; #endif - - }; #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)]) @@ -747,7 +745,6 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema); #define IMAGING_ARROW_INCOMPATIBLE_MODE -10 #define IMAGING_ARROW_MEMORY_LAYOUT -11 - #include "ImagingUtils.h" extern UINT8 *clip8_lookups; diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index fd75a55849a..953cd6f5f3f 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -303,8 +303,8 @@ ImagingDelete(Imaging im) { im->refcount--; if (im->refcount > 0) { - MUTEX_UNLOCK(im->mutex); - return; + MUTEX_UNLOCK(im->mutex); + return; } MUTEX_UNLOCK(im->mutex); @@ -702,10 +702,10 @@ ImagingNewArrow( // fmt:off // don't reformat this if (((strcmp(schema->format, "I") == 0 // int32 && im->pixelsize == 4 // 4xchar* storage - && im->bands >= 2) // INT32 into any INT32 Storage mode - || // (()||()) && - (strcmp(schema->format, im->arrow_band_format) == 0 // same mode - && im->bands == 1)) // Single band match + && im->bands >= 2) // INT32 into any INT32 Storage mode + || // (()||()) && + (strcmp(schema->format, im->arrow_band_format) == 0 // same mode + && im->bands == 1)) // Single band match && pixels == external_array->length) { // one arrow element per, and it matches a pixelsize*char if (ImagingBorrowArrow(im, external_array, im->pixelsize)) { From 13e33010e0c5648b8217b46e331253c4942b3ae3 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Mon, 3 Feb 2025 17:07:05 +0000 Subject: [PATCH 22/55] Fix handling of capsule destruct sequencing * PyCapsules call the destructor on Del * Need to make sure that lifetime matches the array lifetime. --- src/_imaging.c | 17 +++-------------- src/libImaging/Imaging.h | 6 +++--- src/libImaging/Storage.c | 24 +++++++++++++++--------- 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/src/_imaging.c b/src/_imaging.c index fe4c806cc45..b776267bc10 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -301,20 +301,9 @@ _new_arrow(PyObject *self, PyObject *args) { return NULL; } - struct ArrowSchema *schema = - (struct ArrowSchema *)PyCapsule_GetPointer(schema_capsule, "arrow_schema"); - - struct ArrowArray *array = - (struct ArrowArray *)PyCapsule_GetPointer(array_capsule, "arrow_array"); - - ret = PyImagingNew(ImagingNewArrow(mode, xsize, ysize, schema, array)); - if (schema->release) { - schema->release(schema); - schema->release = NULL; - } - if (!ret && array->release) { - array->release(array); - array->release = NULL; + // ImagingBorrowArrow is responsible for retaining the array_capsule + ret = PyImagingNew(ImagingNewArrow(mode, xsize, ysize, schema_capsule, array_capsule)); + if (!ret) { return ImagingError_ValueError("Invalid arrow array mode or size mismatch"); } return ret; diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index ffda969267f..09155765b67 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -113,7 +113,7 @@ struct ImagingMemoryInstance { char arrow_band_format[2]; /* single character + null terminator */ int read_only; /* flag for read-only. set for arrow borrowed arrays */ - struct ArrowArray *arrow_array_capsule; /* upstream arrow array source */ + PyObject *arrow_array_capsule; /* upstream arrow array source */ int blocks_count; /* Number of blocks that have been allocated */ int lines_per_block; /* Number of lines in a block have been allocated */ @@ -209,8 +209,8 @@ ImagingNewArrow( const char *mode, int xsize, int ysize, - struct ArrowSchema *schema, - struct ArrowArray *external_array + PyObject *schema_capsule, + PyObject *array_capsule ); extern Imaging diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 953cd6f5f3f..6b1937a56a8 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -566,15 +566,15 @@ ImagingAllocateBlock(Imaging im) { static void ImagingDestroyArrow(Imaging im) { - if (im->arrow_array_capsule && im->arrow_array_capsule->release) { - im->arrow_array_capsule->release(im->arrow_array_capsule); - im->arrow_array_capsule->release = NULL; + // Rely on the internal python destructor for the array capsule. + if (im->arrow_array_capsule){ + Py_DECREF(im->arrow_array_capsule); im->arrow_array_capsule = NULL; } } Imaging -ImagingBorrowArrow(Imaging im, struct ArrowArray *external_array, int offset_width) { +ImagingBorrowArrow(Imaging im, struct ArrowArray *external_array, int offset_width, PyObject* arrow_capsule) { // offset_width is the # of char* for a single offset from arrow Py_ssize_t y, i; @@ -600,7 +600,8 @@ ImagingBorrowArrow(Imaging im, struct ArrowArray *external_array, int offset_wid i += im->linesize; } im->read_only = 1; - im->arrow_array_capsule = external_array; + Py_INCREF(arrow_capsule); + im->arrow_array_capsule = arrow_capsule; im->destroy = ImagingDestroyArrow; return im; @@ -682,11 +683,16 @@ ImagingNewArrow( const char *mode, int xsize, int ysize, - struct ArrowSchema *schema, - struct ArrowArray *external_array + PyObject *schema_capsule, + PyObject *array_capsule ) { /* A borrowed arrow array */ Imaging im; + struct ArrowSchema *schema = + (struct ArrowSchema *)PyCapsule_GetPointer(schema_capsule, "arrow_schema"); + + struct ArrowArray *external_array = + (struct ArrowArray *)PyCapsule_GetPointer(array_capsule, "arrow_array"); if (xsize < 0 || ysize < 0) { return (Imaging)ImagingError_ValueError("bad image size"); @@ -708,7 +714,7 @@ ImagingNewArrow( && im->bands == 1)) // Single band match && pixels == external_array->length) { // one arrow element per, and it matches a pixelsize*char - if (ImagingBorrowArrow(im, external_array, im->pixelsize)) { + if (ImagingBorrowArrow(im, external_array, im->pixelsize, array_capsule)) { return im; } } @@ -724,7 +730,7 @@ ImagingNewArrow( && external_array->children // array is well formed && 4 * pixels == external_array->children[0]->length) { // 4 up element of char into pixelsize == 4 - if (ImagingBorrowArrow(im, external_array, 1)) { + if (ImagingBorrowArrow(im, external_array, 1, array_capsule)) { return im; } } From 240175733e2824122427a7d401d52abfbeb96c91 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Mon, 3 Feb 2025 17:40:10 +0000 Subject: [PATCH 23/55] Add a way to force the use of the old block allocator * Test that arrow can be exported when the block allocator is forced. --- Tests/test_arrow.py | 66 +++++++++++++++++++++++++++++++++++++--- src/_imaging.c | 18 +++++++++++ src/libImaging/Imaging.h | 3 ++ src/libImaging/Storage.c | 13 ++++++++ 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index 231db651242..ee61340018f 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -170,8 +170,7 @@ def test_multiblock_l_image(): with pytest.raises(ValueError): (schema, arr) = img.__arrow_c_array__() - -def test_multiblock__rgba_image(): +def test_multiblock_rgba_image(): block_size = Image.core.get_block_size() # check a 2 block image in 4 channel mode @@ -192,8 +191,7 @@ def test_multiblock_l_schema(): with pytest.raises(ValueError): schema = img.__arrow_c_schema__() - -def test_multiblock__rgba_schema(): +def test_multiblock_rgba_schema(): block_size = Image.core.get_block_size() # check a 2 block image in 4 channel mode @@ -201,4 +199,62 @@ def test_multiblock__rgba_schema(): img = Image.new("RGBA", size, (128, 127, 126, 125)) with pytest.raises(ValueError): - schema = img.__arrow_c_schema__() + schema= img.__arrow_c_schema__() + + +def test_singleblock_l_image(): + Image.core.set_use_block_allocator(1) + + block_size = Image.core.get_block_size() + + # check a 2 block image in 4 channel mode + size = (4096, 2* (block_size//4096)) + img = Image.new('L', size, 128) + assert img.im.isblock() + + (schema, arr) = img.__arrow_c_array__() + assert schema + assert arr + + Image.core.set_use_block_allocator(0) + +def test_singleblock_rgba_image(): + Image.core.set_use_block_allocator(1) + block_size = Image.core.get_block_size() + + # check a 2 block image in 4 channel mode + size = (4096, (block_size//4096) //2) + img = Image.new('RGBA', size, (128,127,126,125)) + assert img.im.isblock() + + (schema, arr) = img.__arrow_c_array__() + assert schema + assert arr + Image.core.set_use_block_allocator(0) + + +def test_singleblock_l_schema(): + Image.core.set_use_block_allocator(1) + block_size = Image.core.get_block_size() + + # check a 2 block image in single channel mode + size = (4096, 2*block_size//4096) + img = Image.new('L', size, 128) + assert img.im.isblock() + + schema = img.__arrow_c_schema__() + assert schema + Image.core.set_use_block_allocator(0) + +def test_singleblock_rgba_schema(): + Image.core.set_use_block_allocator(1) + block_size = Image.core.get_block_size() + + # check a 2 block image in 4 channel mode + size = (4096, (block_size//4096) //2) + img = Image.new('RGBA', size, (128,127,126,125)) + assert img.im.isblock() + + schema= img.__arrow_c_schema__() + assert schema + Image.core.set_use_block_allocator(0) diff --git a/src/_imaging.c b/src/_imaging.c index b776267bc10..54ec59ac738 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -4192,6 +4192,22 @@ _set_blocks_max(PyObject *self, PyObject *args) { return Py_None; } +static PyObject * +_set_use_block_allocator(PyObject *self, PyObject *args) { + int use_block_allocator; + if (!PyArg_ParseTuple(args, "i:set_use_block_allocator", &use_block_allocator)) { + return NULL; + } + ImagingMemorySetBlockAllocator(&ImagingDefaultArena, use_block_allocator); + Py_RETURN_NONE; +} + + +static PyObject * +_get_use_block_allocator(PyObject *self, PyObject *args) { + return PyLong_FromLong(ImagingDefaultArena.use_block_allocator); +} + static PyObject * _clear_cache(PyObject *self, PyObject *args) { int i = 0; @@ -4399,9 +4415,11 @@ static PyMethodDef functions[] = { {"get_alignment", (PyCFunction)_get_alignment, METH_VARARGS}, {"get_block_size", (PyCFunction)_get_block_size, METH_VARARGS}, {"get_blocks_max", (PyCFunction)_get_blocks_max, METH_VARARGS}, + {"get_use_block_allocator", (PyCFunction)_get_use_block_allocator, METH_VARARGS}, {"set_alignment", (PyCFunction)_set_alignment, METH_VARARGS}, {"set_block_size", (PyCFunction)_set_block_size, METH_VARARGS}, {"set_blocks_max", (PyCFunction)_set_blocks_max, METH_VARARGS}, + {"set_use_block_allocator", (PyCFunction)_set_use_block_allocator, METH_VARARGS}, {"clear_cache", (PyCFunction)_clear_cache, METH_VARARGS}, {NULL, NULL} /* sentinel */ diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 09155765b67..fb3d31a99d6 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -178,6 +178,7 @@ typedef struct ImagingMemoryArena { int stats_reallocated_blocks; /* Number of blocks which were actually reallocated after retrieving */ int stats_freed_blocks; /* Number of freed blocks */ + int use_block_allocator; /* don't use arena, use block allocator */ #ifdef Py_GIL_DISABLED PyMutex mutex; #endif @@ -191,6 +192,8 @@ extern int ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max); extern void ImagingMemoryClearCache(ImagingMemoryArena arena, int new_size); +extern void +ImagingMemorySetBlockAllocator(ImagingMemoryArena arena, int use_block_allocator); extern Imaging ImagingNew(const char *mode, int xsize, int ysize); diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 6b1937a56a8..a1241098e7a 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -341,6 +341,7 @@ struct ImagingMemoryArena ImagingDefaultArena = { 0, 0, 0, // Stats + 0, // use_block_allocator #ifdef Py_GIL_DISABLED {0}, #endif @@ -373,6 +374,12 @@ ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max) { return 1; } +void +ImagingMemorySetBlockAllocator(ImagingMemoryArena arena, int use_block_allocator) { + arena->use_block_allocator = use_block_allocator; +} + + void ImagingMemoryClearCache(ImagingMemoryArena arena, int new_size) { while (arena->blocks_cached > new_size) { @@ -649,11 +656,17 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) { Imaging ImagingNew(const char *mode, int xsize, int ysize) { + if (ImagingDefaultArena.use_block_allocator) { + return ImagingNewBlock(mode, xsize, ysize); + } return ImagingNewInternal(mode, xsize, ysize, 0); } Imaging ImagingNewDirty(const char *mode, int xsize, int ysize) { + if (ImagingDefaultArena.use_block_allocator) { + return ImagingNewBlock(mode, xsize, ysize); + } return ImagingNewInternal(mode, xsize, ysize, 1); } From be3b0fd05cf3026a828ef1a5863340bdd5a264ea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 17:43:07 +0000 Subject: [PATCH 24/55] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Tests/test_arrow.py | 24 ++++++++++++++---------- src/_imaging.c | 5 +++-- src/libImaging/Storage.c | 10 +++++++--- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index ee61340018f..f236360b403 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -170,6 +170,7 @@ def test_multiblock_l_image(): with pytest.raises(ValueError): (schema, arr) = img.__arrow_c_array__() + def test_multiblock_rgba_image(): block_size = Image.core.get_block_size() @@ -191,6 +192,7 @@ def test_multiblock_l_schema(): with pytest.raises(ValueError): schema = img.__arrow_c_schema__() + def test_multiblock_rgba_schema(): block_size = Image.core.get_block_size() @@ -199,7 +201,7 @@ def test_multiblock_rgba_schema(): img = Image.new("RGBA", size, (128, 127, 126, 125)) with pytest.raises(ValueError): - schema= img.__arrow_c_schema__() + schema = img.__arrow_c_schema__() def test_singleblock_l_image(): @@ -208,8 +210,8 @@ def test_singleblock_l_image(): block_size = Image.core.get_block_size() # check a 2 block image in 4 channel mode - size = (4096, 2* (block_size//4096)) - img = Image.new('L', size, 128) + size = (4096, 2 * (block_size // 4096)) + img = Image.new("L", size, 128) assert img.im.isblock() (schema, arr) = img.__arrow_c_array__() @@ -218,13 +220,14 @@ def test_singleblock_l_image(): Image.core.set_use_block_allocator(0) + def test_singleblock_rgba_image(): Image.core.set_use_block_allocator(1) block_size = Image.core.get_block_size() # check a 2 block image in 4 channel mode - size = (4096, (block_size//4096) //2) - img = Image.new('RGBA', size, (128,127,126,125)) + size = (4096, (block_size // 4096) // 2) + img = Image.new("RGBA", size, (128, 127, 126, 125)) assert img.im.isblock() (schema, arr) = img.__arrow_c_array__() @@ -238,23 +241,24 @@ def test_singleblock_l_schema(): block_size = Image.core.get_block_size() # check a 2 block image in single channel mode - size = (4096, 2*block_size//4096) - img = Image.new('L', size, 128) + size = (4096, 2 * block_size // 4096) + img = Image.new("L", size, 128) assert img.im.isblock() schema = img.__arrow_c_schema__() assert schema Image.core.set_use_block_allocator(0) + def test_singleblock_rgba_schema(): Image.core.set_use_block_allocator(1) block_size = Image.core.get_block_size() # check a 2 block image in 4 channel mode - size = (4096, (block_size//4096) //2) - img = Image.new('RGBA', size, (128,127,126,125)) + size = (4096, (block_size // 4096) // 2) + img = Image.new("RGBA", size, (128, 127, 126, 125)) assert img.im.isblock() - schema= img.__arrow_c_schema__() + schema = img.__arrow_c_schema__() assert schema Image.core.set_use_block_allocator(0) diff --git a/src/_imaging.c b/src/_imaging.c index 54ec59ac738..f07473ee710 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -302,7 +302,9 @@ _new_arrow(PyObject *self, PyObject *args) { } // ImagingBorrowArrow is responsible for retaining the array_capsule - ret = PyImagingNew(ImagingNewArrow(mode, xsize, ysize, schema_capsule, array_capsule)); + ret = + PyImagingNew(ImagingNewArrow(mode, xsize, ysize, schema_capsule, array_capsule) + ); if (!ret) { return ImagingError_ValueError("Invalid arrow array mode or size mismatch"); } @@ -4202,7 +4204,6 @@ _set_use_block_allocator(PyObject *self, PyObject *args) { Py_RETURN_NONE; } - static PyObject * _get_use_block_allocator(PyObject *self, PyObject *args) { return PyLong_FromLong(ImagingDefaultArena.use_block_allocator); diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index a1241098e7a..76e5108aff7 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -379,7 +379,6 @@ ImagingMemorySetBlockAllocator(ImagingMemoryArena arena, int use_block_allocator arena->use_block_allocator = use_block_allocator; } - void ImagingMemoryClearCache(ImagingMemoryArena arena, int new_size) { while (arena->blocks_cached > new_size) { @@ -574,14 +573,19 @@ ImagingAllocateBlock(Imaging im) { static void ImagingDestroyArrow(Imaging im) { // Rely on the internal python destructor for the array capsule. - if (im->arrow_array_capsule){ + if (im->arrow_array_capsule) { Py_DECREF(im->arrow_array_capsule); im->arrow_array_capsule = NULL; } } Imaging -ImagingBorrowArrow(Imaging im, struct ArrowArray *external_array, int offset_width, PyObject* arrow_capsule) { +ImagingBorrowArrow( + Imaging im, + struct ArrowArray *external_array, + int offset_width, + PyObject *arrow_capsule +) { // offset_width is the # of char* for a single offset from arrow Py_ssize_t y, i; From 159ffe3bc335b27bf74ceee25c0d0fae9f13d6da Mon Sep 17 00:00:00 2001 From: wiredfool Date: Mon, 3 Feb 2025 20:34:40 +0000 Subject: [PATCH 25/55] Fix mutex for Free-tread pythons --- src/libImaging/Arrow.c | 12 ++++++------ src/libImaging/Storage.c | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index 29aadb5e100..493c2a87e10 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -165,9 +165,9 @@ export_single_channel_array(Imaging im, struct ArrowArray *array) { length = im->xsize * im->lines_per_block; } - MUTEX_LOCK(im->mutex); + MUTEX_LOCK(&im->mutex); im->refcount++; - MUTEX_UNLOCK(im->mutex); + MUTEX_UNLOCK(&im->mutex); // Initialize primitive fields *array = (struct ArrowArray){// Data description .length = length, @@ -208,9 +208,9 @@ export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { length = im->xsize * im->lines_per_block; } - MUTEX_LOCK(im->mutex); + MUTEX_LOCK(&im->mutex); im->refcount++; - MUTEX_UNLOCK(im->mutex); + MUTEX_UNLOCK(&im->mutex); // Initialize primitive fields // Fixed length arrays are 1 buffer of validity, and the length in pixels. // Data is in a child array. @@ -246,9 +246,9 @@ export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { goto err; } - MUTEX_LOCK(im->mutex); + MUTEX_LOCK(&im->mutex); im->refcount++; - MUTEX_UNLOCK(im->mutex); + MUTEX_UNLOCK(&im->mutex); *array->children[0] = (struct ArrowArray){// Data description .length = length * 4, .offset = 0, diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 76e5108aff7..4ee1af436e7 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -299,14 +299,14 @@ ImagingDelete(Imaging im) { return; } - MUTEX_LOCK(im->mutex); + MUTEX_LOCK(&im->mutex); im->refcount--; if (im->refcount > 0) { - MUTEX_UNLOCK(im->mutex); + MUTEX_UNLOCK(&im->mutex); return; } - MUTEX_UNLOCK(im->mutex); + MUTEX_UNLOCK(&im->mutex); if (im->palette) { ImagingPaletteDelete(im->palette); From 91e275965f08603ec7a174d9ab4bd63618e33df9 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Fri, 14 Feb 2025 17:47:43 +0000 Subject: [PATCH 26/55] split pyarrow tests out from plain loopback arrow tests --- Tests/test_arrow.py | 93 ------------------------------------ Tests/test_pyarrow.py | 108 ++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 6 ++- 3 files changed, 113 insertions(+), 94 deletions(-) create mode 100644 Tests/test_pyarrow.py diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index f236360b403..e5b48f2b62a 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -12,62 +12,6 @@ hopper, ) -pyarrow = pytest.importorskip("pyarrow", reason="PyArrow not installed") - -TEST_IMAGE_SIZE = (10, 10) - - -def _test_img_equals_pyarray(img: Image.Image, arr: Any, mask) -> None: - assert img.height * img.width == len(arr) - px = img.load() - assert px is not None - for x in range(0, img.size[0], int(img.size[0] / 10)): - for y in range(0, img.size[1], int(img.size[1] / 10)): - if mask: - for ix, elt in enumerate(mask): - assert px[x, y][ix] == arr[y * img.width + x].as_py()[elt] - else: - assert_deep_equal(px[x, y], arr[y * img.width + x].as_py()) - - -# really hard to get a non-nullable list type -fl_uint8_4_type = pyarrow.field( - "_", pyarrow.list_(pyarrow.field("_", pyarrow.uint8()).with_nullable(False), 4) -).type - - -@pytest.mark.parametrize( - "mode, dtype, mask", - ( - ("L", pyarrow.uint8(), None), - ("I", pyarrow.int32(), None), - ("F", pyarrow.float32(), None), - ("LA", fl_uint8_4_type, [0, 3]), - ("RGB", fl_uint8_4_type, [0, 1, 2]), - ("RGBA", fl_uint8_4_type, None), - ("RGBX", fl_uint8_4_type, None), - ("CMYK", fl_uint8_4_type, None), - ("YCbCr", fl_uint8_4_type, [0, 1, 2]), - ("HSV", fl_uint8_4_type, [0, 1, 2]), - ), -) -def test_to_array(mode: str, dtype: Any, mask: Any) -> None: - img = hopper(mode) - - # Resize to non-square - img = img.crop((3, 0, 124, 127)) - assert img.size == (121, 127) - - arr = pyarrow.array(img) - _test_img_equals_pyarray(img, arr, mask) - assert arr.type == dtype - - reloaded = Image.fromarrow(arr, mode, img.size) - - assert reloaded - - assert_image_equal(img, reloaded) - @pytest.mark.parametrize( "mode, dest_modes", @@ -99,43 +43,6 @@ def test_invalid_array_size(): Image.fromarrow(img, "RGB", (10, 10)) -def test_lifetime(): - # valgrind shouldn't error out here. - # arrays should be accessible after the image is deleted. - - img = hopper("L") - - arr_1 = pyarrow.array(img) - arr_2 = pyarrow.array(img) - - del img - - assert arr_1.sum().as_py() > 0 - del arr_1 - - assert arr_2.sum().as_py() > 0 - del arr_2 - - -def test_lifetime2(): - # valgrind shouldn't error out here. - # img should remain after the arrays are collected. - - img = hopper("L") - - arr_1 = pyarrow.array(img) - arr_2 = pyarrow.array(img) - - assert arr_1.sum().as_py() > 0 - del arr_1 - - assert arr_2.sum().as_py() > 0 - del arr_2 - - img2 = img.copy() - px = img2.load() - assert isinstance(px[0, 0], int) - def test_release_schema(): # these should not error out, valgrind should be clean diff --git a/Tests/test_pyarrow.py b/Tests/test_pyarrow.py new file mode 100644 index 00000000000..8616d88ad48 --- /dev/null +++ b/Tests/test_pyarrow.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +from typing import Any # undone + +import pytest + +from PIL import Image + +from .helper import ( + assert_deep_equal, + assert_image_equal, + hopper, +) + +pyarrow = pytest.importorskip("pyarrow", reason="PyArrow not installed") + +TEST_IMAGE_SIZE = (10, 10) + + +def _test_img_equals_pyarray(img: Image.Image, arr: Any, mask) -> None: + assert img.height * img.width == len(arr) + px = img.load() + assert px is not None + for x in range(0, img.size[0], int(img.size[0] / 10)): + for y in range(0, img.size[1], int(img.size[1] / 10)): + if mask: + for ix, elt in enumerate(mask): + assert px[x, y][ix] == arr[y * img.width + x].as_py()[elt] + else: + assert_deep_equal(px[x, y], arr[y * img.width + x].as_py()) + + +# really hard to get a non-nullable list type +fl_uint8_4_type = pyarrow.field( + "_", pyarrow.list_(pyarrow.field("_", pyarrow.uint8()).with_nullable(False), 4) +).type + + +@pytest.mark.parametrize( + "mode, dtype, mask", + ( + ("L", pyarrow.uint8(), None), + ("I", pyarrow.int32(), None), + ("F", pyarrow.float32(), None), + ("LA", fl_uint8_4_type, [0, 3]), + ("RGB", fl_uint8_4_type, [0, 1, 2]), + ("RGBA", fl_uint8_4_type, None), + ("RGBX", fl_uint8_4_type, None), + ("CMYK", fl_uint8_4_type, None), + ("YCbCr", fl_uint8_4_type, [0, 1, 2]), + ("HSV", fl_uint8_4_type, [0, 1, 2]), + ), +) +def test_to_array(mode: str, dtype: Any, mask: Any) -> None: + img = hopper(mode) + + # Resize to non-square + img = img.crop((3, 0, 124, 127)) + assert img.size == (121, 127) + + arr = pyarrow.array(img) + _test_img_equals_pyarray(img, arr, mask) + assert arr.type == dtype + + reloaded = Image.fromarrow(arr, mode, img.size) + + assert reloaded + + assert_image_equal(img, reloaded) + + + +def test_lifetime(): + # valgrind shouldn't error out here. + # arrays should be accessible after the image is deleted. + + img = hopper("L") + + arr_1 = pyarrow.array(img) + arr_2 = pyarrow.array(img) + + del img + + assert arr_1.sum().as_py() > 0 + del arr_1 + + assert arr_2.sum().as_py() > 0 + del arr_2 + + +def test_lifetime2(): + # valgrind shouldn't error out here. + # img should remain after the arrays are collected. + + img = hopper("L") + + arr_1 = pyarrow.array(img) + arr_2 = pyarrow.array(img) + + assert arr_1.sum().as_py() > 0 + del arr_1 + + assert arr_2.sum().as_py() > 0 + del arr_2 + + img2 = img.copy() + px = img2.load() + assert isinstance(px[0, 0], int) diff --git a/pyproject.toml b/pyproject.toml index 9cf3344f368..a7e5ecde48e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,13 +61,17 @@ optional-dependencies.tests = [ "markdown2", "olefile", "packaging", - "pyarrow", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers>=2024.10.12", ] + +optional-dependencies.test-arrow = [ + "pyarrow", +] + optional-dependencies.typing = [ "typing-extensions; python_version<'3.10'", ] From 05ae6e9172ccadaf4d53fe30336021a44ce8849b Mon Sep 17 00:00:00 2001 From: wiredfool Date: Fri, 14 Feb 2025 17:52:44 +0000 Subject: [PATCH 27/55] install pyarrow for those test platforms where it's available --- .ci/install.sh | 1 + .github/workflows/macos-install.sh | 1 + .github/workflows/test-windows.yml | 13 +++++++++++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.ci/install.sh b/.ci/install.sh index 5c20e7f3727..3543d3da655 100755 --- a/.ci/install.sh +++ b/.ci/install.sh @@ -36,6 +36,7 @@ python3 -m pip install -U pytest python3 -m pip install -U pytest-cov python3 -m pip install -U pytest-timeout python3 -m pip install pyroma +python3 -m pip install pyarrow if [[ $(uname) != CYGWIN* ]]; then python3 -m pip install numpy diff --git a/.github/workflows/macos-install.sh b/.github/workflows/macos-install.sh index 2301a3a7ef3..6b50138c0de 100755 --- a/.github/workflows/macos-install.sh +++ b/.github/workflows/macos-install.sh @@ -30,6 +30,7 @@ python3 -m pip install -U pytest-cov python3 -m pip install -U pytest-timeout python3 -m pip install pyroma python3 -m pip install numpy +python3 -m pip install pyarrow # extra test images pushd depends && ./install_extra_test_images.sh && popd diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index d905a392585..352aead37fc 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -28,6 +28,7 @@ concurrency: env: COVERAGE_CORE: sysmon + TEST_REQUIREMENTS: "tests, test-arrow" jobs: build: @@ -35,7 +36,15 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["pypy3.10", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: + - "pypy3.10", + env: TEST_REQUIREMENTS: tests + - "3.9" + env: TEST_REQUIREMENTS: tests + - "3.10" + - "3.11" + - "3.12" + - "3.13" timeout-minutes: 30 @@ -180,7 +189,7 @@ jobs: - name: Build Pillow run: | $FLAGS="-C raqm=vendor -C fribidi=vendor" - cmd /c "winbuild\build\build_env.cmd && $env:pythonLocation\python.exe -m pip install -v $FLAGS .[tests]" + cmd /c "winbuild\build\build_env.cmd && $env:pythonLocation\python.exe -m pip install -v $FLAGS .[$TEST_REQUIREMENTS]" & $env:pythonLocation\python.exe selftest.py --installed shell: pwsh From dd18facb2282d07e54b3dfcee271602276fa1c26 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Fri, 14 Feb 2025 18:04:05 +0000 Subject: [PATCH 28/55] fix yml --- .github/workflows/test-windows.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 96e26b86c68..767c8dd22f0 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -38,7 +38,8 @@ jobs: matrix: python-version: - "pypy3.10", - env: TEST_REQUIREMENTS: tests + env: + TEST_REQUIREMENTS: tests - "3.10" - "3.11" - "3.12" @@ -48,7 +49,7 @@ jobs: os: ["windows-latest"] include: # Test the oldest Python on 32-bit - - { python-version: "3.9", architecture: "x86", os: "windows-2019", env: "TEST_REQUIREMENTS: tests" } + - { python-version: "3.9", architecture: "x86", os: "windows-2019", env: {TEST_REQUIREMENTS: "tests"} } timeout-minutes: 30 From b2210d171fe19bd88eaf602258fcc8c40e3dadef Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 19:52:45 +0000 Subject: [PATCH 29/55] mac/linux: install binary only pyarrow, and don't fail if there's no binary --- .ci/install.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.ci/install.sh b/.ci/install.sh index 3543d3da655..60f01202619 100755 --- a/.ci/install.sh +++ b/.ci/install.sh @@ -36,7 +36,9 @@ python3 -m pip install -U pytest python3 -m pip install -U pytest-cov python3 -m pip install -U pytest-timeout python3 -m pip install pyroma -python3 -m pip install pyarrow +# optional test dependency, only install if there's a binary package. +# fails on beta 3.14 and pypy3.10 +python3 -m pip install --only-binary=:all: pyarrow || true if [[ $(uname) != CYGWIN* ]]; then python3 -m pip install numpy From 9021829f7e3fb58c4f9b5e2550e751981e2edbeb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Feb 2025 19:56:39 +0000 Subject: [PATCH 30/55] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Tests/test_arrow.py | 5 ----- Tests/test_pyarrow.py | 1 - pyproject.toml | 8 ++++---- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index e5b48f2b62a..1a9361c9eb4 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -1,14 +1,10 @@ from __future__ import annotations -from typing import Any # undone - import pytest from PIL import Image from .helper import ( - assert_deep_equal, - assert_image_equal, hopper, ) @@ -43,7 +39,6 @@ def test_invalid_array_size(): Image.fromarrow(img, "RGB", (10, 10)) - def test_release_schema(): # these should not error out, valgrind should be clean img = hopper("L") diff --git a/Tests/test_pyarrow.py b/Tests/test_pyarrow.py index 8616d88ad48..68a4f96f09e 100644 --- a/Tests/test_pyarrow.py +++ b/Tests/test_pyarrow.py @@ -69,7 +69,6 @@ def test_to_array(mode: str, dtype: Any, mask: Any) -> None: assert_image_equal(img, reloaded) - def test_lifetime(): # valgrind shouldn't error out here. # arrays should be accessible after the image is deleted. diff --git a/pyproject.toml b/pyproject.toml index 2a46a994891..a43c53bda50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,10 @@ optional-dependencies.fpx = [ optional-dependencies.mic = [ "olefile", ] +optional-dependencies.test-arrow = [ + "pyarrow", +] + optional-dependencies.tests = [ "check-manifest", "coverage>=7.4.2", @@ -68,10 +72,6 @@ optional-dependencies.tests = [ "trove-classifiers>=2024.10.12", ] -optional-dependencies.test-arrow = [ - "pyarrow", -] - optional-dependencies.typing = [ "typing-extensions; python_version<'3.10'", ] From 038f62ce93be1e65099f1a3f1b62543b4c153561 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 20:08:48 +0000 Subject: [PATCH 31/55] fix macos pyarrow test install --- .github/workflows/macos-install.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/macos-install.sh b/.github/workflows/macos-install.sh index 10146cfde47..1f90eb5de9e 100755 --- a/.github/workflows/macos-install.sh +++ b/.github/workflows/macos-install.sh @@ -26,7 +26,9 @@ python3 -m pip install -U pytest-cov python3 -m pip install -U pytest-timeout python3 -m pip install pyroma python3 -m pip install numpy -python3 -m pip install pyarrow +# optional test dependency, only install if there's a binary package. +# fails on beta 3.14 and pypy3.10 +python3 -m pip install --only-binary=:all: pyarrow || true # extra test images pushd depends && ./install_extra_test_images.sh && popd From 01998fc71d946fc04149d3c0428d626696394e2f Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 21:27:42 +0000 Subject: [PATCH 32/55] Added docs --- docs/reference/Image.rst | 3 ++ docs/reference/arrow_support.rst | 85 ++++++++++++++++++++++++++++++ docs/reference/internal_design.rst | 1 + src/PIL/Image.py | 36 +++++++++++++ 4 files changed, 125 insertions(+) create mode 100644 docs/reference/arrow_support.rst diff --git a/docs/reference/Image.rst b/docs/reference/Image.rst index bc3758218db..6a682cabc4a 100644 --- a/docs/reference/Image.rst +++ b/docs/reference/Image.rst @@ -79,6 +79,7 @@ Constructing images .. autofunction:: new .. autofunction:: fromarray +.. autofunction:: fromarrow .. autofunction:: frombytes .. autofunction:: frombuffer @@ -370,6 +371,8 @@ Protocols .. autoclass:: SupportsArrayInterface :show-inheritance: +.. autoclass:: SupportsArrowInterface + :show-inheritance: .. autoclass:: SupportsGetData :show-inheritance: diff --git a/docs/reference/arrow_support.rst b/docs/reference/arrow_support.rst new file mode 100644 index 00000000000..964446000e8 --- /dev/null +++ b/docs/reference/arrow_support.rst @@ -0,0 +1,85 @@ +============= +Arrow Support +============= + +Arrow is an in memory data exchange format that is the spritual +successor to the numpy array interface. It provides for zero copy +access to columnar data, which in our case is Image data. + +The goal with Arrow is to provide native zero-copy interop with any +arrow provider or consumer in the Python ecosystem. + +.. warning:: Zero-copy does not mean zero allocation -- The internal +memory layout of Pillow images contains an allocation for row +pointers, so there is a non-zero, but significantly smaller than a +full copy memory cost to reading an arrow image. + + +Data Formats +============ + +Pillow currently supports exporting arrow images in all modes +**except** for ``BGR;15``, ``BGR;16`` and ``BGR;24``. This is due to +line length packing in these modes making for non-continuous memory. + +For single band images, the exported array is width*height elements, +with each pixel corresponding to the appropriate arrow type. + +For multiband images, the exported array is width*height fixed length +4 element arrays of uint8. This is memory compatible with the raw +image storage of 4 bytes per pixel. + +Mode ``1`` images are exported as 1 uint8 byte/pixel, as this is +consistent with the internal storage. + +Pillow will accept, but not produce, one other format. For any +multichannel image with 32 bit storage per pixel, Pillow will accept +an array of width*height int32 elements, which will then be +interpreted using the mode specific interpretation of the bytes. + +The image mode must match the arrow band format when reading single +channel images + +Memory Allocator +================ + +Pillow's default memory allocator, the :ref:`block_allocator`, +allocates up to a 16MB block for images by default. Larger images +overflow into additional blocks. Arrow requires a single continuous +memory allocation, so images allocated in multiple blocks cannot be +exported in the arrow format. + +To enable the single block allocator:: + + from PIL import Image + Image.core.set_use_block_allocator(1) + +Note that this is a global setting, not a per image setting. + +Unsupported Features +==================== + +* Table/Dataframe protocol. We currently support a single array. +* Null markers, producing or consuming. Null values are inferred from + the mode. e.g. RGB images are stored in the first three bytes of + each 32 bit pixel, and the last byte is an implied null. +* Schema Negotiation. There is an optional schema for the requested + datatype in the arrow source interface. We currently ignore that + parameter. +* Array Metadata. + +Internal Details +================ + +Python Arrow C interface: +https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + +The memory that is exported from the arrow interface is shared -- not +copied, so the lifetime of the memory allocation is no longer strictly +tied to the life of the python object. + +The core imaging struct now has a refcount associated with it, and the +lifetime of the core image struct is now divorced from the python +image object. Creating an arrow reference to the image increments the +refcount, and the imaging struct is only released when the refcount +reaches 0. diff --git a/docs/reference/internal_design.rst b/docs/reference/internal_design.rst index 99a18e9ea99..0411779535b 100644 --- a/docs/reference/internal_design.rst +++ b/docs/reference/internal_design.rst @@ -9,3 +9,4 @@ Internal Reference block_allocator internal_modules c_extension_debugging + arrow_support diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 328d2202d2e..ebfd49b0076 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3305,6 +3305,42 @@ def fromarray(obj: SupportsArrayInterface, mode: str | None = None) -> Image: def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image: + """Creates an image with zero copy shared memory from an object exporting + the arrow_c_array interface protocol:: + + from PIL import Image + import pyarrow as pa + arr = pa.array([0]*(5*5*4), type=pa.uint8()) + im = Image.fromarrow(arr, 'RGBA', (5, 5)) + + If the data representation of the ``obj`` is not compatible with + Pillow internal storage, a ValueError is raised. + + Pillow images can also be converted to arrow objects:: + + from PIL import Image + import pyarrow as pa + im = Image.open('hopper.jpg') + arr = pa.array(im) + + As with array support, when converting Pillow images to arrays, + only pixel values are transferred. This means that P and PA mode + image will lose their palette. + + :param obj: Object with an arrow_c_array interface + :param mode: Image mode. + :param size: Image size. This must match the storage of the arrow object. + :returns: An Image Object + + Note that according to the arrow spec, both the producer and the + consumer should consider the exported array to be immutable, as + unsynchronized updates will potentially cause inconsistent data. + + See: :ref:`arrow-support` for more detailed information + + .. versionadded:: 11.2 + + """ if not hasattr(obj, "__arrow_c_array__"): msg = "arrow_c_array interface not found" raise ValueError(msg) From 4ea8ac80a8e4ffc01e300bc1b17e9d6d0963ce5c Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 21:51:03 +0000 Subject: [PATCH 33/55] Fix the windows test matrix? --- .github/workflows/test-windows.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 75b85e24a7e..1711276f6ab 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -37,12 +37,6 @@ jobs: fail-fast: false matrix: python-version: - - "pypy3.11" - env: - TEST_REQUIREMENTS: tests - - "pypy3.10" - env: - TEST_REQUIREMENTS: tests - "3.10" - "3.11" - "3.12" @@ -53,7 +47,9 @@ jobs: include: # Test the oldest Python on 32-bit - { python-version: "3.9", architecture: "x86", os: "windows-2019", env: {TEST_REQUIREMENTS: "tests"} } - + # test the non-pyarrow capable ones + - { python-version: "pypy3.11", env: {TEST_REQUIREMENTS: "tests"} } + - { python-version: "pypy3.10", env: {TEST_REQUIREMENTS: "tests"} } timeout-minutes: 30 name: Python ${{ matrix.python-version }} (${{ matrix.architecture }}) From e81b669fec9e0bf03b1c0b640b702df324fd8311 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 21:51:31 +0000 Subject: [PATCH 34/55] Lint --- Tests/test_arrow.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index 1a9361c9eb4..3338b6c6a4c 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -24,7 +24,7 @@ ("HSV", ["L", "F"]), ), ) -def test_invalid_array_type(mode: str, dest_modes: List[str]) -> None: +def test_invalid_array_type(mode: str, dest_modes: list[str]) -> None: img = hopper(mode) for dest_mode in dest_modes: with pytest.raises(ValueError): @@ -92,7 +92,7 @@ def test_multiblock_l_schema(): img = Image.new("L", size, 128) with pytest.raises(ValueError): - schema = img.__arrow_c_schema__() + img.__arrow_c_schema__() def test_multiblock_rgba_schema(): @@ -103,7 +103,7 @@ def test_multiblock_rgba_schema(): img = Image.new("RGBA", size, (128, 127, 126, 125)) with pytest.raises(ValueError): - schema = img.__arrow_c_schema__() + img.__arrow_c_schema__() def test_singleblock_l_image(): From 7ac90faabcfbabe2d90a37e7e2b3d74ff453a16f Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 21:51:52 +0000 Subject: [PATCH 35/55] PyCapsules aren't actually importable in python. They're only a C-api thing, so we can only get their type from an instance. They're not in Typing or Types. --- src/PIL/Image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index ebfd49b0076..738702623ee 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3211,8 +3211,8 @@ class SupportsArrowArrayInterface(Protocol): """ def __arrow_c_array__( - self, requested_schema: PyCapsule = None - ) -> tuple[PyCapsule, PyCapsule]: + self, requested_schema: 'PyCapsule' = None + ) -> tuple['PyCapsule', 'PyCapsule']: raise NotImplementedError() From 2418a2382f2a3c0cfc1fa4d25daef39184825846 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 21:56:12 +0000 Subject: [PATCH 36/55] Yaml --- .github/workflows/test-windows.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 1711276f6ab..ab399b19784 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -36,12 +36,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - "3.10" - - "3.11" - - "3.12" - - "3.13" - - "3.14" + python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ] architecture: ["x64"] os: ["windows-latest"] include: From a129efd1318c5a2edc50ebfbc945d184c5ab1ebe Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 22:01:50 +0000 Subject: [PATCH 37/55] Workflow yaml --- .github/workflows/test-windows.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index ab399b19784..e528c6267e0 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -43,8 +43,8 @@ jobs: # Test the oldest Python on 32-bit - { python-version: "3.9", architecture: "x86", os: "windows-2019", env: {TEST_REQUIREMENTS: "tests"} } # test the non-pyarrow capable ones - - { python-version: "pypy3.11", env: {TEST_REQUIREMENTS: "tests"} } - - { python-version: "pypy3.10", env: {TEST_REQUIREMENTS: "tests"} } + - { python-version: "pypy3.11", architecture: "x64", os: "windows-latest", env: {TEST_REQUIREMENTS: "tests"} } + - { python-version: "pypy3.10", architecture: "x64", os: "windows-latest", env: {TEST_REQUIREMENTS: "tests"} } timeout-minutes: 30 name: Python ${{ matrix.python-version }} (${{ matrix.architecture }}) From afc16e5e7825e75febd85dd428b498ff99e7eebb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Feb 2025 22:02:58 +0000 Subject: [PATCH 38/55] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/PIL/Image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 738702623ee..ebfd49b0076 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3211,8 +3211,8 @@ class SupportsArrowArrayInterface(Protocol): """ def __arrow_c_array__( - self, requested_schema: 'PyCapsule' = None - ) -> tuple['PyCapsule', 'PyCapsule']: + self, requested_schema: PyCapsule = None + ) -> tuple[PyCapsule, PyCapsule]: raise NotImplementedError() From bba1b1339f5ecd509b7a6c06424bed1e9005215d Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 22:30:08 +0000 Subject: [PATCH 39/55] docs tweaks --- docs/reference/Image.rst | 2 +- docs/reference/arrow_support.rst | 2 ++ docs/reference/block_allocator.rst | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/reference/Image.rst b/docs/reference/Image.rst index 6a682cabc4a..a3ba8cfd8e1 100644 --- a/docs/reference/Image.rst +++ b/docs/reference/Image.rst @@ -371,7 +371,7 @@ Protocols .. autoclass:: SupportsArrayInterface :show-inheritance: -.. autoclass:: SupportsArrowInterface +.. autoclass:: SupportsArrowArrayInterface :show-inheritance: .. autoclass:: SupportsGetData :show-inheritance: diff --git a/docs/reference/arrow_support.rst b/docs/reference/arrow_support.rst index 964446000e8..9669092b51e 100644 --- a/docs/reference/arrow_support.rst +++ b/docs/reference/arrow_support.rst @@ -1,3 +1,5 @@ +.. _arrow-support: + ============= Arrow Support ============= diff --git a/docs/reference/block_allocator.rst b/docs/reference/block_allocator.rst index 1abe5280fbf..f4d27e24e57 100644 --- a/docs/reference/block_allocator.rst +++ b/docs/reference/block_allocator.rst @@ -1,3 +1,6 @@ + +.. _block_allocator: + Block Allocator =============== From 19eb3e446a829cc28830b0fd92b499bd5a2efb53 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 22:34:12 +0000 Subject: [PATCH 40/55] environment reference in test-windows.yml --- .github/workflows/test-windows.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index e528c6267e0..502d750950b 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -36,13 +36,14 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ] + python-version: [ "3.10", "3.11", "3.12", "3.13" ] architecture: ["x64"] os: ["windows-latest"] include: # Test the oldest Python on 32-bit - { python-version: "3.9", architecture: "x86", os: "windows-2019", env: {TEST_REQUIREMENTS: "tests"} } # test the non-pyarrow capable ones + - { python-version: "3.14", architecture: "x64", os: "windows-latest", env: {TEST_REQUIREMENTS: "tests"} } - { python-version: "pypy3.11", architecture: "x64", os: "windows-latest", env: {TEST_REQUIREMENTS: "tests"} } - { python-version: "pypy3.10", architecture: "x64", os: "windows-latest", env: {TEST_REQUIREMENTS: "tests"} } timeout-minutes: 30 @@ -189,7 +190,7 @@ jobs: - name: Build Pillow run: | $FLAGS="-C raqm=vendor -C fribidi=vendor" - cmd /c "winbuild\build\build_env.cmd && $env:pythonLocation\python.exe -m pip install -v $FLAGS .[$TEST_REQUIREMENTS]" + cmd /c "winbuild\build\build_env.cmd && $env:pythonLocation\python.exe -m pip install -v $FLAGS .[$env:TEST_REQUIREMENTS]" & $env:pythonLocation\python.exe selftest.py --installed shell: pwsh From cba8e09494e902a1ad05ea1d2415b8c8c7743ca2 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 22:39:46 +0000 Subject: [PATCH 41/55] Fix pre-commit.ci lint. --- src/PIL/Image.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index ebfd49b0076..d52985da0fc 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3210,9 +3210,16 @@ class SupportsArrowArrayInterface(Protocol): data interface. """ - def __arrow_c_array__( - self, requested_schema: PyCapsule = None - ) -> tuple[PyCapsule, PyCapsule]: + # Sorry, no types for you until pre-commit.ci stops changing stringy + # PyCapsule type to an unimportable value type, which then fails lint. + # PyCapsules are not importable, and only available in the C-API layer. + # def __arrow_c_array__( + # self, requested_schema: 'PyCapsule' = None + # ) -> tuple['PyCapsule', 'PyCapsule']: + # raise NotImplementedError() + + # old not typed definition. + def __arrow_c_array__(self, requested_schema = None) -> tuple: raise NotImplementedError() From 7e59428f001ac2b36bb0c4870f51e9271ae160ff Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 22:53:15 +0000 Subject: [PATCH 42/55] Take 4: not environment variables --- .github/workflows/test-windows.yml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 502d750950b..e248eafc0be 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -28,7 +28,6 @@ concurrency: env: COVERAGE_CORE: sysmon - TEST_REQUIREMENTS: "tests, test-arrow" jobs: build: @@ -39,13 +38,14 @@ jobs: python-version: [ "3.10", "3.11", "3.12", "3.13" ] architecture: ["x64"] os: ["windows-latest"] + pyarrow: ["true"] include: # Test the oldest Python on 32-bit - - { python-version: "3.9", architecture: "x86", os: "windows-2019", env: {TEST_REQUIREMENTS: "tests"} } + - { python-version: "3.9", architecture: "x86", os: "windows-2019", pyarrow: "false" } # test the non-pyarrow capable ones - - { python-version: "3.14", architecture: "x64", os: "windows-latest", env: {TEST_REQUIREMENTS: "tests"} } - - { python-version: "pypy3.11", architecture: "x64", os: "windows-latest", env: {TEST_REQUIREMENTS: "tests"} } - - { python-version: "pypy3.10", architecture: "x64", os: "windows-latest", env: {TEST_REQUIREMENTS: "tests"} } + - { python-version: "3.14", architecture: "x64", os: "windows-latest", pyarrow: "false" } + - { python-version: "pypy3.11", architecture: "x64", os: "windows-latest", pyarrow: "false" } + - { python-version: "pypy3.10", architecture: "x64", os: "windows-latest", pyarrow: "false" } timeout-minutes: 30 name: Python ${{ matrix.python-version }} (${{ matrix.architecture }}) @@ -92,6 +92,11 @@ jobs: run: | python3 -m pip install PyQt6 + - name: Install PyArrow Test Dependency + if: "matrix.pyarrow == 'true'" + run: | + python3 -m pip install --only-binary=:all: pyarrow + - name: Install dependencies id: install run: | @@ -190,7 +195,7 @@ jobs: - name: Build Pillow run: | $FLAGS="-C raqm=vendor -C fribidi=vendor" - cmd /c "winbuild\build\build_env.cmd && $env:pythonLocation\python.exe -m pip install -v $FLAGS .[$env:TEST_REQUIREMENTS]" + cmd /c "winbuild\build\build_env.cmd && $env:pythonLocation\python.exe -m pip install -v $FLAGS .[tests]" & $env:pythonLocation\python.exe selftest.py --installed shell: pwsh From a8d819c442cbcee9e22d7f27e667a43e2adea55a Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 23:01:34 +0000 Subject: [PATCH 43/55] Mypy error -- can't have a bare tuple So you don't even get that typing. --- src/PIL/Image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index d52985da0fc..478f8289bd0 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3219,7 +3219,7 @@ class SupportsArrowArrayInterface(Protocol): # raise NotImplementedError() # old not typed definition. - def __arrow_c_array__(self, requested_schema = None) -> tuple: + def __arrow_c_array__(self, requested_schema = None): raise NotImplementedError() From 7d498c3c489e437be400be9a1423e21144ed833e Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 23:02:26 +0000 Subject: [PATCH 44/55] Mypy error -- doesn't like the none return --- src/PIL/Image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 478f8289bd0..5bb4265db0d 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3357,7 +3357,7 @@ def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image: if _im: return Image()._new(_im) - return None + raise ValueError("new_arrow returned None without an exception") def fromqimage(im: ImageQt.QImage) -> ImageFile.ImageFile: From e4ad2c07a550398b4669fb7212e0af2c3e78e50a Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 23:04:45 +0000 Subject: [PATCH 45/55] doc indent --- docs/reference/arrow_support.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/reference/arrow_support.rst b/docs/reference/arrow_support.rst index 9669092b51e..c9f12c64b26 100644 --- a/docs/reference/arrow_support.rst +++ b/docs/reference/arrow_support.rst @@ -12,9 +12,9 @@ The goal with Arrow is to provide native zero-copy interop with any arrow provider or consumer in the Python ecosystem. .. warning:: Zero-copy does not mean zero allocation -- The internal -memory layout of Pillow images contains an allocation for row -pointers, so there is a non-zero, but significantly smaller than a -full copy memory cost to reading an arrow image. + memory layout of Pillow images contains an allocation for row + pointers, so there is a non-zero, but significantly smaller than a + full copy memory cost to reading an arrow image. Data Formats From cb146720d8cbe470c6b16c92dfc0fda2e8acccca Mon Sep 17 00:00:00 2001 From: wiredfool Date: Tue, 18 Feb 2025 23:12:38 +0000 Subject: [PATCH 46/55] le-sigh --- src/PIL/Image.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 5bb4265db0d..148ba462e0f 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3357,7 +3357,8 @@ def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image: if _im: return Image()._new(_im) - raise ValueError("new_arrow returned None without an exception") + msg = "new_arrow returned None without an exception" + raise ValueError(msg) def fromqimage(im: ImageQt.QImage) -> ImageFile.ImageFile: From bafb968872b913e28d83586647c69b39811c50fa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Feb 2025 23:26:36 +0000 Subject: [PATCH 47/55] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/PIL/Image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 148ba462e0f..aa18f90d94c 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3219,7 +3219,7 @@ class SupportsArrowArrayInterface(Protocol): # raise NotImplementedError() # old not typed definition. - def __arrow_c_array__(self, requested_schema = None): + def __arrow_c_array__(self, requested_schema=None): raise NotImplementedError() From cca80e2a2372387ed6b9868ab4fcfffad2df2333 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Wed, 19 Feb 2025 21:53:16 +0000 Subject: [PATCH 48/55] make mypy happy --- Tests/test_pyarrow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Tests/test_pyarrow.py b/Tests/test_pyarrow.py index 68a4f96f09e..72c546c0d65 100644 --- a/Tests/test_pyarrow.py +++ b/Tests/test_pyarrow.py @@ -25,7 +25,9 @@ def _test_img_equals_pyarray(img: Image.Image, arr: Any, mask) -> None: for y in range(0, img.size[1], int(img.size[1] / 10)): if mask: for ix, elt in enumerate(mask): - assert px[x, y][ix] == arr[y * img.width + x].as_py()[elt] + pixel = px[x, y] + assert isinstance(pixel, tuple) + assert pixel[ix] == arr[y * img.width + x].as_py()[elt] else: assert_deep_equal(px[x, y], arr[y * img.width + x].as_py()) From ae187cacb2a18323e3a0d9bfe5f8739a25d5153b Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 29 Mar 2025 23:18:48 +0000 Subject: [PATCH 49/55] Apply suggestions from code review Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .github/workflows/test-windows.yml | 13 ++---- Tests/test_arrow.py | 4 +- docs/reference/arrow_support.rst | 63 +++++++++++++++--------------- src/PIL/Image.py | 21 ++++------ src/_imaging.c | 6 +-- 5 files changed, 46 insertions(+), 61 deletions(-) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index e248eafc0be..0b45f93c97b 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -35,17 +35,11 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.10", "3.11", "3.12", "3.13" ] + python-version: ["pypy3.11", "pypy3.10", "3.10", "3.11", "3.12", "3.13", "3.14"] architecture: ["x64"] os: ["windows-latest"] - pyarrow: ["true"] include: # Test the oldest Python on 32-bit - - { python-version: "3.9", architecture: "x86", os: "windows-2019", pyarrow: "false" } - # test the non-pyarrow capable ones - - { python-version: "3.14", architecture: "x64", os: "windows-latest", pyarrow: "false" } - - { python-version: "pypy3.11", architecture: "x64", os: "windows-latest", pyarrow: "false" } - - { python-version: "pypy3.10", architecture: "x64", os: "windows-latest", pyarrow: "false" } timeout-minutes: 30 name: Python ${{ matrix.python-version }} (${{ matrix.architecture }}) @@ -92,10 +86,9 @@ jobs: run: | python3 -m pip install PyQt6 - - name: Install PyArrow Test Dependency - if: "matrix.pyarrow == 'true'" + - name: Install PyArrow dependency run: | - python3 -m pip install --only-binary=:all: pyarrow + python3 -m pip install --only-binary=:all: pyarrow || true - name: Install dependencies id: install diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index 3338b6c6a4c..a8675721606 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -4,9 +4,7 @@ from PIL import Image -from .helper import ( - hopper, -) +from .helper import hopper @pytest.mark.parametrize( diff --git a/docs/reference/arrow_support.rst b/docs/reference/arrow_support.rst index c9f12c64b26..9f14f140701 100644 --- a/docs/reference/arrow_support.rst +++ b/docs/reference/arrow_support.rst @@ -4,71 +4,72 @@ Arrow Support ============= -Arrow is an in memory data exchange format that is the spritual -successor to the numpy array interface. It provides for zero copy -access to columnar data, which in our case is Image data. +`Arrow `__ +is an in-memory data exchange format that is the spiritual +successor to the NumPy array interface. It provides for zero-copy +access to columnar data, which in our case is ``Image`` data. -The goal with Arrow is to provide native zero-copy interop with any -arrow provider or consumer in the Python ecosystem. +The goal with Arrow is to provide native zero-copy interoperability +with any Arrow provider or consumer in the Python ecosystem. -.. warning:: Zero-copy does not mean zero allocation -- The internal +.. warning:: Zero-copy does not mean zero allocation -- the internal memory layout of Pillow images contains an allocation for row pointers, so there is a non-zero, but significantly smaller than a - full copy memory cost to reading an arrow image. + full-copy memory cost to reading an Arrow image. Data Formats ============ -Pillow currently supports exporting arrow images in all modes +Pillow currently supports exporting Arrow images in all modes **except** for ``BGR;15``, ``BGR;16`` and ``BGR;24``. This is due to -line length packing in these modes making for non-continuous memory. +line-length packing in these modes making for non-continuous memory. -For single band images, the exported array is width*height elements, -with each pixel corresponding to the appropriate arrow type. +For single-band images, the exported array is width*height elements, +with each pixel corresponding to the appropriate Arrow type. -For multiband images, the exported array is width*height fixed length -4 element arrays of uint8. This is memory compatible with the raw -image storage of 4 bytes per pixel. +For multiband images, the exported array is width*height fixed-length +four-element arrays of uint8. This is memory compatible with the raw +image storage of four bytes per pixel. -Mode ``1`` images are exported as 1 uint8 byte/pixel, as this is +Mode ``1`` images are exported as one uint8 byte/pixel, as this is consistent with the internal storage. Pillow will accept, but not produce, one other format. For any -multichannel image with 32 bit storage per pixel, Pillow will accept +multichannel image with 32-bit storage per pixel, Pillow will accept an array of width*height int32 elements, which will then be -interpreted using the mode specific interpretation of the bytes. +interpreted using the mode-specific interpretation of the bytes. -The image mode must match the arrow band format when reading single -channel images +The image mode must match the Arrow band format when reading single +channel images. Memory Allocator ================ Pillow's default memory allocator, the :ref:`block_allocator`, -allocates up to a 16MB block for images by default. Larger images +allocates up to a 16 MB block for images by default. Larger images overflow into additional blocks. Arrow requires a single continuous memory allocation, so images allocated in multiple blocks cannot be -exported in the arrow format. +exported in the Arrow format. To enable the single block allocator:: from PIL import Image Image.core.set_use_block_allocator(1) -Note that this is a global setting, not a per image setting. +Note that this is a global setting, not a per-image setting. Unsupported Features ==================== -* Table/Dataframe protocol. We currently support a single array. +* Table/dataframe protocol. We support a single array. * Null markers, producing or consuming. Null values are inferred from the mode. e.g. RGB images are stored in the first three bytes of - each 32 bit pixel, and the last byte is an implied null. -* Schema Negotiation. There is an optional schema for the requested - datatype in the arrow source interface. We currently ignore that + each 32-bit pixel, and the last byte is an implied null. +* Schema negotiation. There is an optional schema for the requested + datatype in the Arrow source interface. We ignore that parameter. -* Array Metadata. +* Array metadata. Internal Details ================ @@ -76,12 +77,12 @@ Internal Details Python Arrow C interface: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html -The memory that is exported from the arrow interface is shared -- not +The memory that is exported from the Arrow interface is shared -- not copied, so the lifetime of the memory allocation is no longer strictly -tied to the life of the python object. +tied to the life of the Python object. The core imaging struct now has a refcount associated with it, and the -lifetime of the core image struct is now divorced from the python +lifetime of the core image struct is now divorced from the Python image object. Creating an arrow reference to the image increments the refcount, and the imaging struct is only released when the refcount -reaches 0. +reaches zero. diff --git a/src/PIL/Image.py b/src/PIL/Image.py index aa18f90d94c..0dbb24d91d9 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3210,16 +3210,9 @@ class SupportsArrowArrayInterface(Protocol): data interface. """ - # Sorry, no types for you until pre-commit.ci stops changing stringy - # PyCapsule type to an unimportable value type, which then fails lint. - # PyCapsules are not importable, and only available in the C-API layer. - # def __arrow_c_array__( - # self, requested_schema: 'PyCapsule' = None - # ) -> tuple['PyCapsule', 'PyCapsule']: - # raise NotImplementedError() - - # old not typed definition. - def __arrow_c_array__(self, requested_schema=None): + def __arrow_c_array__( + self, requested_schema: "PyCapsule" = None # type: ignore[name-defined] # noqa: F821, UP037 + ) -> tuple["PyCapsule", "PyCapsule"]: # type: ignore[name-defined] # noqa: F821, UP037 raise NotImplementedError() @@ -3312,7 +3305,7 @@ def fromarray(obj: SupportsArrayInterface, mode: str | None = None) -> Image: def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image: - """Creates an image with zero copy shared memory from an object exporting + """Creates an image with zero-copy shared memory from an object exporting the arrow_c_array interface protocol:: from PIL import Image @@ -3323,7 +3316,7 @@ def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image: If the data representation of the ``obj`` is not compatible with Pillow internal storage, a ValueError is raised. - Pillow images can also be converted to arrow objects:: + Pillow images can also be converted to Arrow objects:: from PIL import Image import pyarrow as pa @@ -3339,13 +3332,13 @@ def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image: :param size: Image size. This must match the storage of the arrow object. :returns: An Image Object - Note that according to the arrow spec, both the producer and the + Note that according to the Arrow spec, both the producer and the consumer should consider the exported array to be immutable, as unsynchronized updates will potentially cause inconsistent data. See: :ref:`arrow-support` for more detailed information - .. versionadded:: 11.2 + .. versionadded:: 11.2.0 """ if not hasattr(obj, "__arrow_c_array__"): diff --git a/src/_imaging.c b/src/_imaging.c index 24ce901bca4..9eee3c82cd4 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -240,14 +240,14 @@ ArrowError(int err) { return ImagingError_MemoryError(); } if (err == IMAGING_ARROW_INCOMPATIBLE_MODE) { - return ImagingError_ValueError("Incompatible Pillow mode for Arrow Array"); + return ImagingError_ValueError("Incompatible Pillow mode for Arrow array"); } if (err == IMAGING_ARROW_MEMORY_LAYOUT) { return ImagingError_ValueError( "Image is in multiple array blocks, use imaging_new_block for zero copy" ); } - return ImagingError_ValueError("Unknown Error"); + return ImagingError_ValueError("Unknown error"); } void @@ -313,7 +313,7 @@ _new_arrow(PyObject *self, PyObject *args) { PyImagingNew(ImagingNewArrow(mode, xsize, ysize, schema_capsule, array_capsule) ); if (!ret) { - return ImagingError_ValueError("Invalid arrow array mode or size mismatch"); + return ImagingError_ValueError("Invalid Arrow array mode or size mismatch"); } return ret; } From 42d0682a370e0462fb6e058dc1969f7d575eb053 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 29 Mar 2025 23:34:16 +0000 Subject: [PATCH 50/55] re-add include item --- .github/workflows/test-windows.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 0b45f93c97b..0f4ef82576d 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -40,6 +40,7 @@ jobs: os: ["windows-latest"] include: # Test the oldest Python on 32-bit + - { python-version: "3.9", architecture: "x86", os: "windows-2019" } timeout-minutes: 30 name: Python ${{ matrix.python-version }} (${{ matrix.architecture }}) From 48bbc64c7281b134a52e6818dedbc4e52b08a57c Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sun, 30 Mar 2025 14:50:00 +0100 Subject: [PATCH 51/55] Test Typing, consistency/style issues Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- .ci/install.sh | 2 +- .github/workflows/macos-install.sh | 2 +- Tests/test_arrow.py | 26 +++++++++++++------------- Tests/test_pyarrow.py | 4 ++-- src/PIL/Image.py | 2 +- src/_imaging.c | 1 - src/libImaging/Storage.c | 2 +- 7 files changed, 19 insertions(+), 20 deletions(-) diff --git a/.ci/install.sh b/.ci/install.sh index d21cae74e70..622a36dab4e 100755 --- a/.ci/install.sh +++ b/.ci/install.sh @@ -37,7 +37,7 @@ python3 -m pip install -U pytest-cov python3 -m pip install -U pytest-timeout python3 -m pip install pyroma # optional test dependency, only install if there's a binary package. -# fails on beta 3.14 and pypy3.10 +# fails on beta 3.14 and PyPy python3 -m pip install --only-binary=:all: pyarrow || true if [[ $(uname) != CYGWIN* ]]; then diff --git a/.github/workflows/macos-install.sh b/.github/workflows/macos-install.sh index 1f90eb5de9e..5ba2aaba3f5 100755 --- a/.github/workflows/macos-install.sh +++ b/.github/workflows/macos-install.sh @@ -27,7 +27,7 @@ python3 -m pip install -U pytest-timeout python3 -m pip install pyroma python3 -m pip install numpy # optional test dependency, only install if there's a binary package. -# fails on beta 3.14 and pypy3.10 +# fails on beta 3.14 and PyPy python3 -m pip install --only-binary=:all: pyarrow || true # extra test images diff --git a/Tests/test_arrow.py b/Tests/test_arrow.py index a8675721606..b86c77b9aa8 100644 --- a/Tests/test_arrow.py +++ b/Tests/test_arrow.py @@ -11,7 +11,7 @@ "mode, dest_modes", ( ("L", ["I", "F", "LA", "RGB", "RGBA", "RGBX", "CMYK", "YCbCr", "HSV"]), - ("I", ["L", "F"]), # Technically I32 can work for any 4x8bit storage. + ("I", ["L", "F"]), # Technically I;32 can work for any 4x8bit storage. ("F", ["I", "L", "LA", "RGB", "RGBA", "RGBX", "CMYK", "YCbCr", "HSV"]), ("LA", ["L", "F"]), ("RGB", ["L", "F"]), @@ -29,7 +29,7 @@ def test_invalid_array_type(mode: str, dest_modes: list[str]) -> None: Image.fromarrow(img, dest_mode, img.size) -def test_invalid_array_size(): +def test_invalid_array_size() -> None: img = hopper("RGB") assert img.size != (10, 10) @@ -37,14 +37,14 @@ def test_invalid_array_size(): Image.fromarrow(img, "RGB", (10, 10)) -def test_release_schema(): +def test_release_schema() -> None: # these should not error out, valgrind should be clean img = hopper("L") schema = img.__arrow_c_schema__() del schema -def test_release_array(): +def test_release_array() -> None: # these should not error out, valgrind should be clean img = hopper("L") array, schema = img.__arrow_c_array__() @@ -52,7 +52,7 @@ def test_release_array(): del schema -def test_readonly(): +def test_readonly() -> None: img = hopper("L") reloaded = Image.fromarrow(img, img.mode, img.size) assert reloaded.readonly == 1 @@ -60,7 +60,7 @@ def test_readonly(): assert reloaded.readonly == 1 -def test_multiblock_l_image(): +def test_multiblock_l_image() -> None: block_size = Image.core.get_block_size() # check a 2 block image in single channel mode @@ -71,7 +71,7 @@ def test_multiblock_l_image(): (schema, arr) = img.__arrow_c_array__() -def test_multiblock_rgba_image(): +def test_multiblock_rgba_image() -> None: block_size = Image.core.get_block_size() # check a 2 block image in 4 channel mode @@ -82,7 +82,7 @@ def test_multiblock_rgba_image(): (schema, arr) = img.__arrow_c_array__() -def test_multiblock_l_schema(): +def test_multiblock_l_schema() -> None: block_size = Image.core.get_block_size() # check a 2 block image in single channel mode @@ -93,7 +93,7 @@ def test_multiblock_l_schema(): img.__arrow_c_schema__() -def test_multiblock_rgba_schema(): +def test_multiblock_rgba_schema() -> None: block_size = Image.core.get_block_size() # check a 2 block image in 4 channel mode @@ -104,7 +104,7 @@ def test_multiblock_rgba_schema(): img.__arrow_c_schema__() -def test_singleblock_l_image(): +def test_singleblock_l_image() -> None: Image.core.set_use_block_allocator(1) block_size = Image.core.get_block_size() @@ -121,7 +121,7 @@ def test_singleblock_l_image(): Image.core.set_use_block_allocator(0) -def test_singleblock_rgba_image(): +def test_singleblock_rgba_image() -> None: Image.core.set_use_block_allocator(1) block_size = Image.core.get_block_size() @@ -136,7 +136,7 @@ def test_singleblock_rgba_image(): Image.core.set_use_block_allocator(0) -def test_singleblock_l_schema(): +def test_singleblock_l_schema() -> None: Image.core.set_use_block_allocator(1) block_size = Image.core.get_block_size() @@ -150,7 +150,7 @@ def test_singleblock_l_schema(): Image.core.set_use_block_allocator(0) -def test_singleblock_rgba_schema(): +def test_singleblock_rgba_schema() -> None: Image.core.set_use_block_allocator(1) block_size = Image.core.get_block_size() diff --git a/Tests/test_pyarrow.py b/Tests/test_pyarrow.py index 72c546c0d65..1f0bdf6b501 100644 --- a/Tests/test_pyarrow.py +++ b/Tests/test_pyarrow.py @@ -71,7 +71,7 @@ def test_to_array(mode: str, dtype: Any, mask: Any) -> None: assert_image_equal(img, reloaded) -def test_lifetime(): +def test_lifetime() -> None: # valgrind shouldn't error out here. # arrays should be accessible after the image is deleted. @@ -89,7 +89,7 @@ def test_lifetime(): del arr_2 -def test_lifetime2(): +def test_lifetime2() -> None: # valgrind shouldn't error out here. # img should remain after the arrays are collected. diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 0dbb24d91d9..04a80fb9a8f 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3325,7 +3325,7 @@ def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image: As with array support, when converting Pillow images to arrays, only pixel values are transferred. This means that P and PA mode - image will lose their palette. + images will lose their palette. :param obj: Object with an arrow_c_array interface :param mode: Image mode. diff --git a/src/_imaging.c b/src/_imaging.c index 9eee3c82cd4..e4a985a8249 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -291,7 +291,6 @@ ExportArrowArrayPyCapsule(ImagingObject *self) { return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule); } free(array); - // raise error here return ArrowError(err); } diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 4ee1af436e7..fb29602316d 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -572,7 +572,7 @@ ImagingAllocateBlock(Imaging im) { static void ImagingDestroyArrow(Imaging im) { - // Rely on the internal python destructor for the array capsule. + // Rely on the internal Python destructor for the array capsule. if (im->arrow_array_capsule) { Py_DECREF(im->arrow_array_capsule); im->arrow_array_capsule = NULL; From 088f80d20df559e059eae2c0e51333ceafafd2ab Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sun, 30 Mar 2025 16:30:02 +0100 Subject: [PATCH 52/55] Fix mypy --- Tests/test_pyarrow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Tests/test_pyarrow.py b/Tests/test_pyarrow.py index 1f0bdf6b501..cb2843b8519 100644 --- a/Tests/test_pyarrow.py +++ b/Tests/test_pyarrow.py @@ -106,4 +106,5 @@ def test_lifetime2() -> None: img2 = img.copy() px = img2.load() + assert px # make mypy happy assert isinstance(px[0, 0], int) From b729f64810db8e6ed7ee70fc6fc5bb6b176fc1d5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 30 Mar 2025 15:30:52 +0000 Subject: [PATCH 53/55] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Tests/test_pyarrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/test_pyarrow.py b/Tests/test_pyarrow.py index cb2843b8519..76075bda2a7 100644 --- a/Tests/test_pyarrow.py +++ b/Tests/test_pyarrow.py @@ -106,5 +106,5 @@ def test_lifetime2() -> None: img2 = img.copy() px = img2.load() - assert px # make mypy happy + assert px # make mypy happy assert isinstance(px[0, 0], int) From 3e28e05c76d78ea88a5a961551858b59369fd279 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 31 Mar 2025 13:31:54 +0300 Subject: [PATCH 54/55] Types and typos Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- Tests/test_pyarrow.py | 6 ++++-- src/PIL/Image.py | 2 +- src/libImaging/Arrow.c | 6 +++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Tests/test_pyarrow.py b/Tests/test_pyarrow.py index 76075bda2a7..ece9f8f2657 100644 --- a/Tests/test_pyarrow.py +++ b/Tests/test_pyarrow.py @@ -17,7 +17,9 @@ TEST_IMAGE_SIZE = (10, 10) -def _test_img_equals_pyarray(img: Image.Image, arr: Any, mask) -> None: +def _test_img_equals_pyarray( + img: Image.Image, arr: Any, mask: list[int] | None +) -> None: assert img.height * img.width == len(arr) px = img.load() assert px is not None @@ -53,7 +55,7 @@ def _test_img_equals_pyarray(img: Image.Image, arr: Any, mask) -> None: ("HSV", fl_uint8_4_type, [0, 1, 2]), ), ) -def test_to_array(mode: str, dtype: Any, mask: Any) -> None: +def test_to_array(mode: str, dtype: Any, mask: list[int] | None) -> None: img = hopper(mode) # Resize to non-square diff --git a/src/PIL/Image.py b/src/PIL/Image.py index 04a80fb9a8f..188cb91f157 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -3330,7 +3330,7 @@ def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image: :param obj: Object with an arrow_c_array interface :param mode: Image mode. :param size: Image size. This must match the storage of the arrow object. - :returns: An Image Object + :returns: An Image object Note that according to the Arrow spec, both the producer and the consumer should consider the exported array to be immutable, as diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index 493c2a87e10..33ff2ce779a 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -110,7 +110,7 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema) { if (retval != 0) { return retval; } - // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. + // if it's not 1 band, it's an int32 at the moment. 4 uint8 bands. schema->n_children = 1; schema->children = calloc(1, sizeof(struct ArrowSchema *)); schema->children[0] = (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema)); @@ -137,7 +137,7 @@ release_const_array(struct ArrowArray *array) { array->buffers = NULL; } if (array->children) { - // undone -- does arrow release all the children recursively. + // undone -- does arrow release all the children recursively? for (int i = 0; i < array->n_children; i++) { if (array->children[i]->release) { array->children[i]->release(array->children[i]); @@ -235,7 +235,7 @@ export_fixed_pixel_array(Imaging im, struct ArrowArray *array) { // assert(array->buffers != NULL); array->buffers[0] = NULL; // no nulls, null bitmap can be omitted - // if it's not 1 band, it's an int32 at the moment. 4 unint8 bands. + // if it's not 1 band, it's an int32 at the moment. 4 uint8 bands. array->n_children = 1; array->children = calloc(1, sizeof(struct ArrowArray *)); if (!array->children) { From 167864104ae7e3aae17f8b3fe761495b40050ce9 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 1 Apr 2025 07:16:14 +0300 Subject: [PATCH 55/55] Apply suggestions from code review Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- docs/reference/arrow_support.rst | 2 +- src/libImaging/Storage.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/reference/arrow_support.rst b/docs/reference/arrow_support.rst index 9f14f140701..4a5c45e8624 100644 --- a/docs/reference/arrow_support.rst +++ b/docs/reference/arrow_support.rst @@ -64,7 +64,7 @@ Unsupported Features * Table/dataframe protocol. We support a single array. * Null markers, producing or consuming. Null values are inferred from - the mode. e.g. RGB images are stored in the first three bytes of + the mode, e.g. RGB images are stored in the first three bytes of each 32-bit pixel, and the last byte is an implied null. * Schema negotiation. There is an optional schema for the requested datatype in the Arrow source interface. We ignore that diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index fb29602316d..4fa4ecd1ce4 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -586,7 +586,7 @@ ImagingBorrowArrow( int offset_width, PyObject *arrow_capsule ) { - // offset_width is the # of char* for a single offset from arrow + // offset_width is the number of char* for a single offset from arrow Py_ssize_t y, i; char *borrowed_buffer = NULL; @@ -735,7 +735,6 @@ ImagingNewArrow( return im; } } - // linter: don't mess with the formatting here if (strcmp(schema->format, "+w:4") == 0 // 4 up array && im->pixelsize == 4 // storage as 32 bpc && schema->n_children > 0 // make sure schema is well formed.