diff --git a/Doc/library/array.rst b/Doc/library/array.rst index 1f04f697c7507f..5592bd7089ba49 100644 --- a/Doc/library/array.rst +++ b/Doc/library/array.rst @@ -9,7 +9,7 @@ -------------- This module defines an object type which can compactly represent an array of -basic values: characters, integers, floating-point numbers. Arrays are sequence +basic values: characters, integers, floating-point numbers. Arrays are mutable :term:`sequence` types and behave very much like lists, except that the type of objects stored in them is constrained. The type is specified at object creation time by using a :dfn:`type code`, which is a single character. The following type codes are @@ -93,7 +93,7 @@ The module defines the following type: otherwise, the initializer's iterator is passed to the :meth:`extend` method to add initial items to the array. - Array objects support the ordinary sequence operations of indexing, slicing, + Array objects support the ordinary :ref:`mutable ` :term:`sequence` operations of indexing, slicing, concatenation, and multiplication. When using slice assignment, the assigned value must be an array object with the same type code; in all other cases, :exc:`TypeError` is raised. Array objects also implement the buffer interface, diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 6118d7c556a617..d03001f1cade05 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1093,11 +1093,14 @@ Notes: still ``0``. (4) - The slice of *s* from *i* to *j* is defined as the sequence of items with index - *k* such that ``i <= k < j``. If *i* or *j* is greater than ``len(s)``, use - ``len(s)``. If *i* is omitted or ``None``, use ``0``. If *j* is omitted or - ``None``, use ``len(s)``. If *i* is greater than or equal to *j*, the slice is - empty. + The slice of *s* from *i* to *j* is defined as the sequence of items with + index *k* such that ``i <= k < j``. + + * If *i* is omitted or ``None``, use ``0``. + * If *j* is omitted or ``None``, use ``len(s)``. + * If *i* or *j* is less than ``-len(s)``, use ``0``. + * If *i* or *j* is greater than ``len(s)``, use ``len(s)``. + * If *i* is greater than or equal to *j*, the slice is empty. (5) The slice of *s* from *i* to *j* with step *k* is defined as the sequence of diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index 4e9946fd4567cd..73236413cbb80f 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -2369,7 +2369,7 @@ These functions and classes should not be used directly as annotations. Their intended purpose is to be building blocks for creating and declaring types. -.. class:: NamedTuple +.. function:: NamedTuple Typed version of :func:`collections.namedtuple`. @@ -2442,6 +2442,10 @@ types. Removed the ``_field_types`` attribute in favor of the more standard ``__annotations__`` attribute which has the same information. + .. versionchanged:: 3.9 + ``NamedTuple`` is now a function rather than a class. + It can still be used as a class base, as described above. + .. versionchanged:: 3.11 Added support for generic namedtuples. @@ -2585,10 +2589,10 @@ types. for more details. -.. class:: TypedDict(dict) +.. function:: TypedDict Special construct to add type hints to a dictionary. - At runtime it is a plain :class:`dict`. + At runtime ":class:`!TypedDict` instances" are simply :class:`dicts `. ``TypedDict`` declares a dictionary type that expects all of its instances to have a certain set of keys, where each key is @@ -2811,6 +2815,10 @@ types. .. versionadded:: 3.8 + .. versionchanged:: 3.9 + ``TypedDict`` is now a function rather than a class. + It can still be used as a class base, as described above. + .. versionchanged:: 3.11 Added support for marking individual keys as :data:`Required` or :data:`NotRequired`. See :pep:`655`. diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 22c70327fb061d..48375cf459ea0b 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -419,6 +419,46 @@ def test_issue18339(self): unpickler.memo = {-1: None} unpickler.memo = {1: None} + def test_concurrent_pickler_dump(self): + f = io.BytesIO() + pickler = self.pickler_class(f) + class X: + def __reduce__(slf): + self.assertRaises(RuntimeError, pickler.dump, 42) + return list, () + pickler.dump(X()) # should not crash + self.assertEqual(pickle.loads(f.getvalue()), []) + + def test_concurrent_pickler_dump_and_init(self): + f = io.BytesIO() + pickler = self.pickler_class(f) + class X: + def __reduce__(slf): + self.assertRaises(RuntimeError, pickler.__init__, f) + return list, () + pickler.dump([X()]) # should not fail + self.assertEqual(pickle.loads(f.getvalue()), [[]]) + + def test_concurrent_unpickler_load(self): + global reducer + def reducer(): + self.assertRaises(RuntimeError, unpickler.load) + return 42 + f = io.BytesIO(b'(c%b\nreducer\n(tRl.' % (__name__.encode(),)) + unpickler = self.unpickler_class(f) + unpickled = unpickler.load() # should not fail + self.assertEqual(unpickled, [42]) + + def test_concurrent_unpickler_load_and_init(self): + global reducer + def reducer(): + self.assertRaises(RuntimeError, unpickler.__init__, f) + return 42 + f = io.BytesIO(b'(c%b\nreducer\n(tRl.' % (__name__.encode(),)) + unpickler = self.unpickler_class(f) + unpickled = unpickler.load() # should not crash + self.assertEqual(unpickled, [42]) + class CDispatchTableTests(AbstractDispatchTableTests, unittest.TestCase): pickler_class = pickle.Pickler def get_dispatch_table(self): @@ -467,7 +507,7 @@ class SizeofTests(unittest.TestCase): check_sizeof = support.check_sizeof def test_pickler(self): - basesize = support.calcobjsize('7P2n3i2n3i2P') + basesize = support.calcobjsize('7P2n3i2n4i2P') p = _pickle.Pickler(io.BytesIO()) self.assertEqual(object.__sizeof__(p), basesize) MT_size = struct.calcsize('3nP0n') @@ -484,7 +524,7 @@ def test_pickler(self): 0) # Write buffer is cleared after every dump(). def test_unpickler(self): - basesize = support.calcobjsize('2P2n3P 2P2n2i5P 2P3n8P2n2i') + basesize = support.calcobjsize('2P2n3P 2P2n2i5P 2P3n8P2n3i') unpickler = _pickle.Unpickler P = struct.calcsize('P') # Size of memo table entry. n = struct.calcsize('n') # Size of mark table entry. diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index bbfe19a4e0bab7..88662fec60fe4a 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -816,6 +816,18 @@ def test_endian_table_init_subinterpreters(self): results = executor.map(exec, [code] * 5) self.assertListEqual(list(results), [None] * 5) + def test_operations_on_half_initialized_Struct(self): + S = struct.Struct.__new__(struct.Struct) + + spam = array.array('b', b' ') + self.assertRaises(RuntimeError, S.iter_unpack, spam) + self.assertRaises(RuntimeError, S.pack, 1) + self.assertRaises(RuntimeError, S.pack_into, spam, 1) + self.assertRaises(RuntimeError, S.unpack, spam) + self.assertRaises(RuntimeError, S.unpack_from, spam) + self.assertRaises(RuntimeError, getattr, S, 'format') + self.assertEqual(S.size, -1) + class UnpackIteratorTest(unittest.TestCase): """ diff --git a/Misc/NEWS.d/next/Library/2026-01-10-10-04-08.gh-issue-78724.xkXfxX.rst b/Misc/NEWS.d/next/Library/2026-01-10-10-04-08.gh-issue-78724.xkXfxX.rst new file mode 100644 index 00000000000000..8a4bec4e1653d1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-10-10-04-08.gh-issue-78724.xkXfxX.rst @@ -0,0 +1,3 @@ +Raise :exc:`RuntimeError`'s when user attempts to call methods on +half-initialized :class:`~struct.Struct` objects, For example, created by +``Struct.__new__(Struct)``. Patch by Sergey B Kirpichev. diff --git a/Misc/NEWS.d/next/Library/2026-01-10-16-42-47.gh-issue-143638.du5G7d.rst b/Misc/NEWS.d/next/Library/2026-01-10-16-42-47.gh-issue-143638.du5G7d.rst new file mode 100644 index 00000000000000..fd71db9c8e0723 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-10-16-42-47.gh-issue-143638.du5G7d.rst @@ -0,0 +1,4 @@ +Forbid reentrant calls of the :class:`pickle.Pickler` and +:class:`pickle.Unpickler` methods for the C implementation. Previously, this +could cause crash or data corruption, now concurrent calls of methods of the +same object raise :exc:`RuntimeError`. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index d4a610e7909b5e..063547c9a4d020 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -641,6 +641,7 @@ typedef struct PicklerObject { int fast_nesting; int fix_imports; /* Indicate whether Pickler should fix the name of globals for Python 2.x. */ + int running; /* True when a method of Pickler is executing. */ PyObject *fast_memo; PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */ } PicklerObject; @@ -685,6 +686,8 @@ typedef struct UnpicklerObject { int proto; /* Protocol of the pickle loaded. */ int fix_imports; /* Indicate whether Unpickler should fix the name of globals pickled by Python 2.x. */ + int running; /* True when a method of Unpickler is executing. */ + } UnpicklerObject; typedef struct { @@ -702,6 +705,32 @@ typedef struct { #define PicklerMemoProxyObject_CAST(op) ((PicklerMemoProxyObject *)(op)) #define UnpicklerMemoProxyObject_CAST(op) ((UnpicklerMemoProxyObject *)(op)) +#define BEGIN_USING_PICKLER(SELF, RET) do { \ + if ((SELF)->running) { \ + PyErr_SetString(PyExc_RuntimeError, \ + "Pickler object is already used"); \ + return (RET); \ + } \ + (SELF)->running = 1; \ + } while (0) + +#define END_USING_PICKLER(SELF) do { \ + (SELF)->running = 0; \ + } while (0) + +#define BEGIN_USING_UNPICKLER(SELF, RET) do { \ + if ((SELF)->running) { \ + PyErr_SetString(PyExc_RuntimeError, \ + "Unpickler object is already used"); \ + return (RET); \ + } \ + (SELF)->running = 1; \ + } while (0) + +#define END_USING_UNPICKLER(SELF) do { \ + (SELF)->running = 0; \ + } while (0) + /* Forward declarations */ static int save(PickleState *state, PicklerObject *, PyObject *, int); static int save_reduce(PickleState *, PicklerObject *, PyObject *, PyObject *); @@ -1131,6 +1160,7 @@ _Pickler_New(PickleState *st) self->fast = 0; self->fast_nesting = 0; self->fix_imports = 0; + self->running = 0; self->fast_memo = NULL; self->buffer_callback = NULL; @@ -1708,6 +1738,7 @@ _Unpickler_New(PyObject *module) self->marks_size = 0; self->proto = 0; self->fix_imports = 0; + self->running = 0; PyObject_GC_Track(self); return self; @@ -4764,17 +4795,23 @@ _pickle_Pickler_dump_impl(PicklerObject *self, PyTypeObject *cls, Py_TYPE(self)->tp_name); return NULL; } + BEGIN_USING_PICKLER(self, NULL); - if (_Pickler_ClearBuffer(self) < 0) - return NULL; - - if (dump(st, self, obj) < 0) - return NULL; - - if (_Pickler_FlushToFile(self) < 0) - return NULL; - + if (_Pickler_ClearBuffer(self) < 0) { + goto error; + } + if (dump(st, self, obj) < 0) { + goto error; + } + if (_Pickler_FlushToFile(self) < 0) { + goto error; + } + END_USING_PICKLER(self); Py_RETURN_NONE; + +error: + END_USING_PICKLER(self); + return NULL; } /*[clinic input] @@ -4915,47 +4952,54 @@ _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file, PyObject *buffer_callback) /*[clinic end generated code: output=0abedc50590d259b input=cddc50f66b770002]*/ { + BEGIN_USING_PICKLER(self, -1); /* In case of multiple __init__() calls, clear previous content. */ if (self->write != NULL) (void)Pickler_clear((PyObject *)self); - if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0) - return -1; - - if (_Pickler_SetOutputStream(self, file) < 0) - return -1; - - if (_Pickler_SetBufferCallback(self, buffer_callback) < 0) - return -1; - + if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0) { + goto error; + } + if (_Pickler_SetOutputStream(self, file) < 0) { + goto error; + } + if (_Pickler_SetBufferCallback(self, buffer_callback) < 0) { + goto error; + } /* memo and output_buffer may have already been created in _Pickler_New */ if (self->memo == NULL) { self->memo = PyMemoTable_New(); - if (self->memo == NULL) - return -1; + if (self->memo == NULL) { + goto error; + } } self->output_len = 0; if (self->output_buffer == NULL) { self->max_output_len = WRITE_BUF_SIZE; self->output_buffer = PyBytes_FromStringAndSize(NULL, self->max_output_len); - if (self->output_buffer == NULL) - return -1; + if (self->output_buffer == NULL) { + goto error; + } } self->fast = 0; self->fast_nesting = 0; self->fast_memo = NULL; - if (self->dispatch_table != NULL) { - return 0; - } - if (PyObject_GetOptionalAttr((PyObject *)self, &_Py_ID(dispatch_table), - &self->dispatch_table) < 0) { - return -1; + if (self->dispatch_table == NULL) { + if (PyObject_GetOptionalAttr((PyObject *)self, &_Py_ID(dispatch_table), + &self->dispatch_table) < 0) { + goto error; + } } + END_USING_PICKLER(self); return 0; + +error: + END_USING_PICKLER(self); + return -1; } @@ -7157,22 +7201,22 @@ static PyObject * _pickle_Unpickler_load_impl(UnpicklerObject *self, PyTypeObject *cls) /*[clinic end generated code: output=cc88168f608e3007 input=f5d2f87e61d5f07f]*/ { - UnpicklerObject *unpickler = (UnpicklerObject*)self; - PickleState *st = _Pickle_GetStateByClass(cls); /* Check whether the Unpickler was initialized correctly. This prevents segfaulting if a subclass overridden __init__ with a function that does not call Unpickler.__init__(). Here, we simply ensure that self->read is not NULL. */ - if (unpickler->read == NULL) { + if (self->read == NULL) { PyErr_Format(st->UnpicklingError, "Unpickler.__init__() was not called by %s.__init__()", - Py_TYPE(unpickler)->tp_name); + Py_TYPE(self)->tp_name); return NULL; } - - return load(st, unpickler); + BEGIN_USING_UNPICKLER(self, NULL); + PyObject *res = load(st, self); + END_USING_UNPICKLER(self); + return res; } /* The name of find_class() is misleading. In newer pickle protocols, this @@ -7436,35 +7480,41 @@ _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file, const char *errors, PyObject *buffers) /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/ { + BEGIN_USING_UNPICKLER(self, -1); /* In case of multiple __init__() calls, clear previous content. */ if (self->read != NULL) (void)Unpickler_clear((PyObject *)self); - if (_Unpickler_SetInputStream(self, file) < 0) - return -1; - - if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0) - return -1; - - if (_Unpickler_SetBuffers(self, buffers) < 0) - return -1; - + if (_Unpickler_SetInputStream(self, file) < 0) { + goto error; + } + if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0) { + goto error; + } + if (_Unpickler_SetBuffers(self, buffers) < 0) { + goto error; + } self->fix_imports = fix_imports; PyTypeObject *tp = Py_TYPE(self); PickleState *state = _Pickle_FindStateByType(tp); self->stack = (Pdata *)Pdata_New(state); - if (self->stack == NULL) - return -1; - + if (self->stack == NULL) { + goto error; + } self->memo_size = 32; self->memo = _Unpickler_NewMemo(self->memo_size); - if (self->memo == NULL) - return -1; - + if (self->memo == NULL) { + goto error; + } self->proto = 0; + END_USING_UNPICKLER(self); return 0; + +error: + END_USING_UNPICKLER(self); + return -1; } diff --git a/Modules/_struct.c b/Modules/_struct.c index 2acb3df3a30395..a8e9021f0a303a 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1698,8 +1698,6 @@ prepare_s(PyStructObject *self) return -1; } - self->s_size = size; - self->s_len = len; codes = PyMem_Malloc((ncodes + 1) * sizeof(formatcode)); if (codes == NULL) { PyErr_NoMemory(); @@ -1709,6 +1707,8 @@ prepare_s(PyStructObject *self) if (self->s_codes != NULL) PyMem_Free(self->s_codes); self->s_codes = codes; + self->s_size = size; + self->s_len = len; s = fmt; size = 0; @@ -1897,6 +1897,14 @@ s_unpack_internal(PyStructObject *soself, const char *startfrom, return NULL; } +#define ENSURE_STRUCT_IS_READY(self) \ + do { \ + if (!(self)->s_codes) { \ + PyErr_SetString(PyExc_RuntimeError, \ + "Struct object is not initialized"); \ + return NULL; \ + } \ + } while (0); /*[clinic input] Struct.unpack @@ -1917,7 +1925,7 @@ Struct_unpack_impl(PyStructObject *self, Py_buffer *buffer) /*[clinic end generated code: output=873a24faf02e848a input=3113f8e7038b2f6c]*/ { _structmodulestate *state = get_struct_state_structinst(self); - assert(self->s_codes != NULL); + ENSURE_STRUCT_IS_READY(self); if (buffer->len != self->s_size) { PyErr_Format(state->StructError, "unpack requires a buffer of %zd bytes", @@ -1949,7 +1957,7 @@ Struct_unpack_from_impl(PyStructObject *self, Py_buffer *buffer, /*[clinic end generated code: output=57fac875e0977316 input=cafd4851d473c894]*/ { _structmodulestate *state = get_struct_state_structinst(self); - assert(self->s_codes != NULL); + ENSURE_STRUCT_IS_READY(self); if (offset < 0) { if (offset + self->s_size > 0) { @@ -2101,8 +2109,7 @@ Struct_iter_unpack_impl(PyStructObject *self, PyObject *buffer) { _structmodulestate *state = get_struct_state_structinst(self); unpackiterobject *iter; - - assert(self->s_codes != NULL); + ENSURE_STRUCT_IS_READY(self); if (self->s_size == 0) { PyErr_Format(state->StructError, @@ -2242,8 +2249,8 @@ s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) /* Validate arguments. */ soself = PyStructObject_CAST(self); + ENSURE_STRUCT_IS_READY(soself); assert(PyStruct_Check(self, state)); - assert(soself->s_codes != NULL); if (nargs != soself->s_len) { PyErr_Format(state->StructError, @@ -2285,8 +2292,8 @@ s_pack_into(PyObject *self, PyObject *const *args, Py_ssize_t nargs) /* Validate arguments. +1 is for the first arg as buffer. */ soself = PyStructObject_CAST(self); + ENSURE_STRUCT_IS_READY(soself); assert(PyStruct_Check(self, state)); - assert(soself->s_codes != NULL); if (nargs != (soself->s_len + 2)) { if (nargs == 0) { @@ -2373,6 +2380,7 @@ static PyObject * s_get_format(PyObject *op, void *Py_UNUSED(closure)) { PyStructObject *self = PyStructObject_CAST(op); + ENSURE_STRUCT_IS_READY(self); return PyUnicode_FromStringAndSize(PyBytes_AS_STRING(self->s_format), PyBytes_GET_SIZE(self->s_format)); }