From 9b130e6d6c1c10b25740ef3f1fe758cf78915859 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 24 Jan 2025 14:14:33 -0500 Subject: [PATCH 1/3] Reduce memory arena contention Previously there was one memory arena for all threads, making it the bottleneck for multi-threaded performance. As the number of threads increased, the contention for the lock on the arena would grow, causing other threads to wait to acquire it. This commit makes it use 8 memory arenas, and round-robbins how they are assigned to threads. Threads keep track of the index that they should use into the arena array, assigned the first time the arena is accessed on a given thread. When an image is first created, it is allocated from an arena. When the logic to have multiple arenas is enabled, it then keeps track of the index on the image, so that when deleted it can be returned to the correct arena. Effectively this means that in single-threaded programs, this should not really have an effect. We also do not do this logic if the GIL is enabled, as it effectively acts as the lock on the default arena for us. As expected, this approach has no real noticable effect on regular CPython. On free-threaded CPython, however, there is a massive difference (measuring up to about 70%). --- setup.py | 64 +++++++++++++++++--- src/_imaging.c | 116 +++++++++++++++++++++++------------- src/libImaging/Imaging.h | 48 +++++++++++++++ src/libImaging/Storage.c | 124 ++++++++++++++++++++++++++++++++------- 4 files changed, 284 insertions(+), 68 deletions(-) diff --git a/setup.py b/setup.py index a85731db936..55e71959a2a 100644 --- a/setup.py +++ b/setup.py @@ -8,18 +8,21 @@ # ------------------------------ from __future__ import annotations +import distutils.ccompiler import os import re import shutil import struct import subprocess import sys +import tempfile import warnings from collections.abc import Iterator from typing import Any from setuptools import Extension, setup from setuptools.command.build_ext import build_ext +from setuptools.errors import CompileError def get_version() -> str: @@ -292,6 +295,47 @@ def _pkg_config(name: str) -> tuple[list[str], list[str]] | None: return None +def _try_compile(compiler: distutils.ccompiler.CCompiler, code: str) -> bool: + try: + with tempfile.TemporaryDirectory() as d: + fn = os.path.join(d, "test.c") + with open(fn, "w") as f: + f.write(code) + compiler.compile([fn], output_dir=d, extra_preargs=["-Werror"]) + return True + except CompileError: + return False + + +def _try_compile_attr(compiler: distutils.ccompiler.CCompiler, attr: str) -> bool: + code = f""" + #pragma GCC diagnostic error "-Wattributes" + #pragma clang diagnostic error "-Wattributes" + + int {attr} foo; + int main() {{ + return 0; + }} + """ + + return _try_compile(compiler, code) + + +def _try_compile_tls_define_macros( + compiler: distutils.ccompiler.CCompiler, +) -> list[tuple[str, str | None]]: + if _try_compile_attr(compiler, "thread_local"): # C23 + return [("HAVE_THREAD_LOCAL", None)] + elif _try_compile_attr(compiler, "_Thread_local"): # C11/C17 + return [("HAVE__THREAD_LOCAL", None)] + elif _try_compile_attr(compiler, "__thread"): # GCC/clang + return [("HAVE___THREAD", None)] + elif _try_compile_attr(compiler, "__declspec(thread)"): # MSVC + return [("HAVE___DECLSPEC_THREAD_", None)] + else: + return [] + + class pil_build_ext(build_ext): class ext_feature: features = [ @@ -426,13 +470,14 @@ def finalize_options(self) -> None: def _update_extension( self, name: str, - libraries: list[str] | list[str | bool | None], + libraries: list[str] | list[str | bool | None] | None = None, define_macros: list[tuple[str, str | None]] | None = None, sources: list[str] | None = None, ) -> None: for extension in self.extensions: if extension.name == name: - extension.libraries += libraries + if libraries is not None: + extension.libraries += libraries if define_macros is not None: extension.define_macros += define_macros if sources is not None: @@ -890,7 +935,10 @@ def build_extensions(self) -> None: defs.append(("PILLOW_VERSION", f'"{PILLOW_VERSION}"')) - self._update_extension("PIL._imaging", libs, defs) + tls_define_macros = _try_compile_tls_define_macros(self.compiler) + self._update_extension("PIL._imaging", libs, defs + tls_define_macros) + self._update_extension("PIL._imagingmath", define_macros=tls_define_macros) + self._update_extension("PIL._imagingmorph", define_macros=tls_define_macros) # # additional libraries @@ -913,7 +961,9 @@ def build_extensions(self) -> None: libs.append(feature.get("fribidi")) else: # building FriBiDi shim from src/thirdparty srcs.append("src/thirdparty/fribidi-shim/fribidi.c") - self._update_extension("PIL._imagingft", libs, defs, srcs) + self._update_extension( + "PIL._imagingft", libs, defs + tls_define_macros, srcs + ) else: self._remove_extension("PIL._imagingft") @@ -922,19 +972,19 @@ def build_extensions(self) -> None: libs = [feature.get("lcms")] if sys.platform == "win32": libs.extend(["user32", "gdi32"]) - self._update_extension("PIL._imagingcms", libs) + self._update_extension("PIL._imagingcms", libs, tls_define_macros) else: self._remove_extension("PIL._imagingcms") webp = feature.get("webp") if isinstance(webp, str): libs = [webp, webp + "mux", webp + "demux"] - self._update_extension("PIL._webp", libs) + self._update_extension("PIL._webp", libs, tls_define_macros) else: self._remove_extension("PIL._webp") tk_libs = ["psapi"] if sys.platform in ("win32", "cygwin") else [] - self._update_extension("PIL._imagingtk", tk_libs) + self._update_extension("PIL._imagingtk", tk_libs, tls_define_macros) build_ext.build_extensions(self) diff --git a/src/_imaging.c b/src/_imaging.c index 2fd2deffbe6..8aadad915c1 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -3912,34 +3912,49 @@ _get_stats(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingMemoryArena arena = &ImagingDefaultArena; - - v = PyLong_FromLong(arena->stats_new_count); + long stats_new_count = 0; + long stats_allocated_blocks = 0; + long stats_reused_blocks = 0; + long stats_reallocated_blocks = 0; + long stats_freed_blocks = 0; + long blocks_cached = 0; + + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + stats_new_count += arena->stats_new_count; + stats_allocated_blocks += arena->stats_allocated_blocks; + stats_reused_blocks += arena->stats_reused_blocks; + stats_reallocated_blocks += arena->stats_reallocated_blocks; + stats_freed_blocks += arena->stats_freed_blocks; + blocks_cached += arena->blocks_cached; + MUTEX_UNLOCK(&arena->mutex); + } + + v = PyLong_FromLong(stats_new_count); PyDict_SetItemString(d, "new_count", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->stats_allocated_blocks); + v = PyLong_FromLong(stats_allocated_blocks); PyDict_SetItemString(d, "allocated_blocks", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->stats_reused_blocks); + v = PyLong_FromLong(stats_reused_blocks); PyDict_SetItemString(d, "reused_blocks", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->stats_reallocated_blocks); + v = PyLong_FromLong(stats_reallocated_blocks); PyDict_SetItemString(d, "reallocated_blocks", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->stats_freed_blocks); + v = PyLong_FromLong(stats_freed_blocks); PyDict_SetItemString(d, "freed_blocks", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->blocks_cached); + v = PyLong_FromLong(blocks_cached); PyDict_SetItemString(d, "blocks_cached", v ? v : Py_None); Py_XDECREF(v); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); return d; } @@ -3949,14 +3964,16 @@ _reset_stats(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingMemoryArena arena = &ImagingDefaultArena; - arena->stats_new_count = 0; - arena->stats_allocated_blocks = 0; - arena->stats_reused_blocks = 0; - arena->stats_reallocated_blocks = 0; - arena->stats_freed_blocks = 0; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + arena->stats_new_count = 0; + arena->stats_allocated_blocks = 0; + arena->stats_reused_blocks = 0; + arena->stats_reallocated_blocks = 0; + arena->stats_freed_blocks = 0; + MUTEX_UNLOCK(&arena->mutex); + } Py_RETURN_NONE; } @@ -3967,9 +3984,10 @@ _get_alignment(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - int alignment = ImagingDefaultArena.alignment; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + MUTEX_LOCK(&arena->mutex); + int alignment = arena->alignment; + MUTEX_UNLOCK(&arena->mutex); return PyLong_FromLong(alignment); } @@ -3979,9 +3997,10 @@ _get_block_size(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - int block_size = ImagingDefaultArena.block_size; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + MUTEX_LOCK(&arena->mutex); + int block_size = arena->block_size; + MUTEX_UNLOCK(&arena->mutex); return PyLong_FromLong(block_size); } @@ -3991,9 +4010,10 @@ _get_blocks_max(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - int blocks_max = ImagingDefaultArena.blocks_max; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + MUTEX_LOCK(&arena->mutex); + int blocks_max = arena->blocks_max; + MUTEX_UNLOCK(&arena->mutex); return PyLong_FromLong(blocks_max); } @@ -4014,9 +4034,12 @@ _set_alignment(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingDefaultArena.alignment = alignment; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + arena->alignment = alignment; + MUTEX_UNLOCK(&arena->mutex); + } Py_RETURN_NONE; } @@ -4038,9 +4061,12 @@ _set_block_size(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingDefaultArena.block_size = block_size; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + arena->block_size = block_size; + MUTEX_UNLOCK(&arena->mutex); + } Py_RETURN_NONE; } @@ -4058,15 +4084,20 @@ _set_blocks_max(PyObject *self, PyObject *args) { } if ((unsigned long)blocks_max > - SIZE_MAX / sizeof(ImagingDefaultArena.blocks_pool[0])) { + SIZE_MAX / sizeof(ImagingGetArena()->blocks_pool[0])) { PyErr_SetString(PyExc_ValueError, "blocks_max is too large"); return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - int status = ImagingMemorySetBlocksMax(&ImagingDefaultArena, blocks_max); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); - if (!status) { + int error = 0; + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + error |= ImagingMemorySetBlocksMax(arena, blocks_max); + MUTEX_UNLOCK(&arena->mutex); + } + + if (error) { return ImagingError_MemoryError(); } @@ -4081,9 +4112,12 @@ _clear_cache(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingMemoryClearCache(&ImagingDefaultArena, i); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + ImagingMemoryClearCache(arena, i); + MUTEX_UNLOCK(&arena->mutex); + } Py_RETURN_NONE; } diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 0c2d3fc2e34..5933b0ecdb7 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -51,6 +51,20 @@ extern "C" { * extensions, see http://www.effbot.org/zone/pil-extending.htm */ +#ifdef Py_GIL_DISABLED + #if defined(__cplusplus) + #define IMAGING_TLS thread_local + #elif defined(HAVE_THREAD_LOCAL) + #define IMAGING_TLS thread_local + #elif defined(HAVE__THREAD_LOCAL) + #define IMAGING_TLS _Thread_local + #elif defined(HAVE___THREAD) + #define IMAGING_TLS __thread + #elif defined(HAVE___DECLSPEC_THREAD_) + #define IMAGING_TLS __declspec(thread) + #endif +#endif + /* Handles */ typedef struct ImagingMemoryInstance *Imaging; @@ -104,6 +118,10 @@ struct ImagingMemoryInstance { /* Virtual methods */ void (*destroy)(Imaging im); + +#ifdef IMAGING_TLS + int arenaindex; /* Index of the arena this image is associated with. */ +#endif }; #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)]) @@ -161,6 +179,9 @@ typedef struct ImagingMemoryArena { int stats_reallocated_blocks; /* Number of blocks which were actually reallocated after retrieving */ int stats_freed_blocks; /* Number of freed blocks */ +#ifdef IMAGING_TLS + int index; /* Index of the arena in the global array. */ +#endif #ifdef Py_GIL_DISABLED PyMutex mutex; #endif @@ -169,7 +190,34 @@ typedef struct ImagingMemoryArena { /* Objects */ /* ------- */ +#ifdef IMAGING_TLS +/* In this case we both do not have the GIL and have thread-local storage, so we + * will allocate a set of arenas and associated them with threads one at a time. + */ +#define IMAGING_ARENAS_COUNT 8 +extern struct ImagingMemoryArena ImagingArenas[IMAGING_ARENAS_COUNT]; + +/* Provide a macro that loops through each arena that has been + * statically-allocated. This is necessary to properly handle stats. + */ +#define IMAGING_ARENAS_FOREACH(arena) \ + for ((arena) = &ImagingArenas[0]; arena->block_size; ++(arena)) +#else +/* In this case we either have the GIL or do not have thread-local storage, in + * which case we will only allocate a single arena. + */ extern struct ImagingMemoryArena ImagingDefaultArena; + +/* Provide a macro that loops through each arena that has been + * statically-allocated. In this case because there is only one, this is + * effectively a single block of code. + */ +#define IMAGING_ARENAS_FOREACH(arena) \ + for ((arena) = &ImagingDefaultArena; (arena); (arena) = NULL) +#endif + +ImagingMemoryArena ImagingGetArena(void); + extern int ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max); extern void diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 522e9f37557..8960d76e74c 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -218,9 +218,10 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { break; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingDefaultArena.stats_new_count += 1; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + MUTEX_LOCK(&arena->mutex); + arena->stats_new_count += 1; + MUTEX_UNLOCK(&arena->mutex); return im; } @@ -258,23 +259,102 @@ ImagingDelete(Imaging im) { /* Allocate image as an array of line buffers. */ #define IMAGING_PAGE_SIZE (4096) +#define IMAGING_ARENA_BLOCK_SIZE (16 * 1024 * 1024) +#ifdef IMAGING_TLS +/* This is the overall process-level index that keeps track of the next index + * that will be assigned to a thread. + */ +static uint64_t ImagingArenaIndex = 0; + +/* This is the thread-local index that associated a thread with an arena in the + * statically-allocated list. + */ +static IMAGING_TLS uint64_t ImagingArenaThreadIndex = UINT64_MAX; + +/* These are the statically-allocated arenas. */ +struct ImagingMemoryArena ImagingArenas[IMAGING_ARENAS_COUNT] = { + { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 0, {0} }, + { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 1, {0} }, + { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 2, {0} }, + { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 3, {0} }, + { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 4, {0} }, + { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 5, {0} }, + { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 6, {0} }, + { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 7, {0} }, + { 0 } +}; + +/* Get a pointer to the correct arena for this context. In this case where we + * are using a round-robin approach to the statically allocated arenas, we will + * return the arena that is assigned to the thread on first use. + */ +ImagingMemoryArena ImagingGetArena(void) { + if (ImagingArenaThreadIndex == UINT64_MAX) { + ImagingArenaThreadIndex = _Py_atomic_add_uint64(&ImagingArenaIndex, 1) % IMAGING_ARENAS_COUNT; + } + return &ImagingArenas[ImagingArenaThreadIndex]; +} + +/* Return the arena associated with the given image. In this case the index of + * the arena is stored on the image itself. + */ +ImagingMemoryArena ImagingGetArenaFromImaging(Imaging im) { + int arenaindex = im->arenaindex; + assert(arenaindex >= 0 && arenaindex < IMAGING_ARENAS_COUNT); + return &ImagingArenas[arenaindex]; +} + +/* Set the arena index on the given image based on the index of the arena. This + * is necessary in order to return the blocks to the correct arena when the + * image is destroyed. + */ +static void ImagingSetArenaOnImaging(Imaging im, ImagingMemoryArena arena) { + im->arenaindex = arena->index; +} +#else +/* Because we have the GIL (or do not have thread-local storage), we only have a + * single arena. + */ struct ImagingMemoryArena ImagingDefaultArena = { - 1, // alignment - 16 * 1024 * 1024, // block_size - 0, // blocks_max - 0, // blocks_cached - NULL, // blocks_pool + 1, // alignment + IMAGING_ARENA_BLOCK_SIZE, // block_size + 0, // blocks_max + 0, // blocks_cached + NULL, // blocks_pool 0, 0, 0, 0, 0, // Stats #ifdef Py_GIL_DISABLED + /* On the very off-chance that someone is running free-threaded Python on a + * platform that does not support thread-local storage, we need a mutex + * here. + */ {0}, #endif }; +/* Get a pointer to the correct arena for this context. In this case where we + * either have the GIL or we do not have TLS, we will return only the default + * arena. + */ +ImagingMemoryArena ImagingGetArena(void) { + return &ImagingDefaultArena; +} + +/* Return the arena associated with the given image. In this case because we + * only have one arena, we always return the default arena. + */ +#define ImagingGetArenaFromImaging(im) &ImagingDefaultArena + +/* Set the arena index on the given image based on the index of the arena. In + * this case because we only have one arena, we do not need to do anything. + */ +#define ImagingSetArenaOnImaging(im, arena) +#endif + int ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max) { void *p; @@ -288,18 +368,18 @@ ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max) { p = realloc(arena->blocks_pool, sizeof(*arena->blocks_pool) * blocks_max); if (!p) { // Leave previous blocks_max value - return 0; + return 1; } arena->blocks_pool = p; } else { arena->blocks_pool = calloc(sizeof(*arena->blocks_pool), blocks_max); if (!arena->blocks_pool) { - return 0; + return 1; } } arena->blocks_max = blocks_max; - return 1; + return 0; } void @@ -369,12 +449,13 @@ ImagingDestroyArray(Imaging im) { int y = 0; if (im->blocks) { - MUTEX_LOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArenaFromImaging(im); + MUTEX_LOCK(&arena->mutex); while (im->blocks[y].ptr) { - memory_return_block(&ImagingDefaultArena, im->blocks[y]); + memory_return_block(arena, im->blocks[y]); y += 1; } - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + MUTEX_UNLOCK(&arena->mutex); free(im->blocks); } } @@ -504,11 +585,14 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + ImagingSetArenaOnImaging(im, arena); + + MUTEX_LOCK(&arena->mutex); Imaging tmp = ImagingAllocateArray( - im, &ImagingDefaultArena, dirty, ImagingDefaultArena.block_size + im, arena, dirty, arena->block_size ); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + MUTEX_UNLOCK(&arena->mutex); if (tmp) { return im; } @@ -516,9 +600,9 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) { ImagingError_Clear(); // Try to allocate the image once more with smallest possible block size - MUTEX_LOCK(&ImagingDefaultArena.mutex); - tmp = ImagingAllocateArray(im, &ImagingDefaultArena, dirty, IMAGING_PAGE_SIZE); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + MUTEX_LOCK(&arena->mutex); + tmp = ImagingAllocateArray(im, arena, dirty, IMAGING_PAGE_SIZE); + MUTEX_UNLOCK(&arena->mutex); if (tmp) { return im; } From cff21412c5b0d47b7545d6af32e21c6927ddcca9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Jan 2025 13:27:18 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/libImaging/Imaging.h | 25 +++++++++++++------------ src/libImaging/Storage.c | 37 ++++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 5933b0ecdb7..6bb93d5403f 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -52,17 +52,17 @@ extern "C" { */ #ifdef Py_GIL_DISABLED - #if defined(__cplusplus) - #define IMAGING_TLS thread_local - #elif defined(HAVE_THREAD_LOCAL) - #define IMAGING_TLS thread_local - #elif defined(HAVE__THREAD_LOCAL) - #define IMAGING_TLS _Thread_local - #elif defined(HAVE___THREAD) - #define IMAGING_TLS __thread - #elif defined(HAVE___DECLSPEC_THREAD_) - #define IMAGING_TLS __declspec(thread) - #endif +#if defined(__cplusplus) +#define IMAGING_TLS thread_local +#elif defined(HAVE_THREAD_LOCAL) +#define IMAGING_TLS thread_local +#elif defined(HAVE__THREAD_LOCAL) +#define IMAGING_TLS _Thread_local +#elif defined(HAVE___THREAD) +#define IMAGING_TLS __thread +#elif defined(HAVE___DECLSPEC_THREAD_) +#define IMAGING_TLS __declspec(thread) +#endif #endif /* Handles */ @@ -216,7 +216,8 @@ extern struct ImagingMemoryArena ImagingDefaultArena; for ((arena) = &ImagingDefaultArena; (arena); (arena) = NULL) #endif -ImagingMemoryArena ImagingGetArena(void); +ImagingMemoryArena +ImagingGetArena(void); extern int ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max); diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 8960d76e74c..16f07816a9a 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -274,24 +274,26 @@ static IMAGING_TLS uint64_t ImagingArenaThreadIndex = UINT64_MAX; /* These are the statically-allocated arenas. */ struct ImagingMemoryArena ImagingArenas[IMAGING_ARENAS_COUNT] = { - { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 0, {0} }, - { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 1, {0} }, - { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 2, {0} }, - { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 3, {0} }, - { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 4, {0} }, - { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 5, {0} }, - { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 6, {0} }, - { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 7, {0} }, - { 0 } + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 0, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 1, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 2, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 3, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 4, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 5, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 6, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 7, {0}}, + {0} }; /* Get a pointer to the correct arena for this context. In this case where we * are using a round-robin approach to the statically allocated arenas, we will * return the arena that is assigned to the thread on first use. */ -ImagingMemoryArena ImagingGetArena(void) { +ImagingMemoryArena +ImagingGetArena(void) { if (ImagingArenaThreadIndex == UINT64_MAX) { - ImagingArenaThreadIndex = _Py_atomic_add_uint64(&ImagingArenaIndex, 1) % IMAGING_ARENAS_COUNT; + ImagingArenaThreadIndex = + _Py_atomic_add_uint64(&ImagingArenaIndex, 1) % IMAGING_ARENAS_COUNT; } return &ImagingArenas[ImagingArenaThreadIndex]; } @@ -299,7 +301,8 @@ ImagingMemoryArena ImagingGetArena(void) { /* Return the arena associated with the given image. In this case the index of * the arena is stored on the image itself. */ -ImagingMemoryArena ImagingGetArenaFromImaging(Imaging im) { +ImagingMemoryArena +ImagingGetArenaFromImaging(Imaging im) { int arenaindex = im->arenaindex; assert(arenaindex >= 0 && arenaindex < IMAGING_ARENAS_COUNT); return &ImagingArenas[arenaindex]; @@ -309,7 +312,8 @@ ImagingMemoryArena ImagingGetArenaFromImaging(Imaging im) { * is necessary in order to return the blocks to the correct arena when the * image is destroyed. */ -static void ImagingSetArenaOnImaging(Imaging im, ImagingMemoryArena arena) { +static void +ImagingSetArenaOnImaging(Imaging im, ImagingMemoryArena arena) { im->arenaindex = arena->index; } #else @@ -340,7 +344,8 @@ struct ImagingMemoryArena ImagingDefaultArena = { * either have the GIL or we do not have TLS, we will return only the default * arena. */ -ImagingMemoryArena ImagingGetArena(void) { +ImagingMemoryArena +ImagingGetArena(void) { return &ImagingDefaultArena; } @@ -589,9 +594,7 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) { ImagingSetArenaOnImaging(im, arena); MUTEX_LOCK(&arena->mutex); - Imaging tmp = ImagingAllocateArray( - im, arena, dirty, arena->block_size - ); + Imaging tmp = ImagingAllocateArray(im, arena, dirty, arena->block_size); MUTEX_UNLOCK(&arena->mutex); if (tmp) { return im; From e2a96a5c2c39f29b01bb27b598a861806db459ec Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Sat, 25 Jan 2025 18:23:34 +1100 Subject: [PATCH 3/3] Only import distutils when type checking --- setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 55e71959a2a..ba510d57c2b 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,6 @@ # ------------------------------ from __future__ import annotations -import distutils.ccompiler import os import re import shutil @@ -18,12 +17,15 @@ import tempfile import warnings from collections.abc import Iterator -from typing import Any +from typing import TYPE_CHECKING, Any from setuptools import Extension, setup from setuptools.command.build_ext import build_ext from setuptools.errors import CompileError +if TYPE_CHECKING: + import distutils.ccompiler + def get_version() -> str: version_file = "src/PIL/_version.py"