diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 71066f1bd9f19b..7f60eb495080ae 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -22,33 +22,48 @@ extern "C" { Another use is for the Tier 2 optimizer to decide when to create a new Tier 2 trace (executor). Again, exponential backoff is used. - The 16-bit counter is structured as a 12-bit unsigned 'value' - and a 4-bit 'backoff' field. When resetting the counter, the + The 16-bit counter is structured as a 13-bit unsigned 'value' + and a 3-bit 'backoff' field. When resetting the counter, the backoff field is incremented (until it reaches a limit) and the - value is set to a bit mask representing the value 2**backoff - 1. - The maximum backoff is 12 (the number of bits in the value). + value is set to a bit mask representing some prime value - 1. + New values and backoffs for each backoff are calculated once + at compile time and saved to value_and_backoff_next table. + The maximum backoff is 6, since 7 is an UNREACHABLE_BACKOFF. There is an exceptional value which must not be updated, 0xFFFF. */ -#define BACKOFF_BITS 4 -#define MAX_BACKOFF 12 -#define UNREACHABLE_BACKOFF 15 - -static inline bool -is_unreachable_backoff_counter(_Py_BackoffCounter counter) -{ - return counter.value_and_backoff == UNREACHABLE_BACKOFF; -} +#define BACKOFF_BITS 3 +#define BACKOFF_MASK 7 +#define MAX_BACKOFF 6 +#define UNREACHABLE_BACKOFF 7 +#define MAX_VALUE 0x1FFF + +#define MAKE_VALUE_AND_BACKOFF(value, backoff) \ + ((value << BACKOFF_BITS) | backoff) + +// For previous backoff b we use value x such that +// x + 1 is near to 2**(2*b+1) and x + 1 is prime. +static const uint16_t value_and_backoff_next[] = { + MAKE_VALUE_AND_BACKOFF(1, 1), + MAKE_VALUE_AND_BACKOFF(6, 2), + MAKE_VALUE_AND_BACKOFF(30, 3), + MAKE_VALUE_AND_BACKOFF(126, 4), + MAKE_VALUE_AND_BACKOFF(508, 5), + MAKE_VALUE_AND_BACKOFF(2052, 6), + // We use the same backoff counter for all backoffs >= MAX_BACKOFF. + MAKE_VALUE_AND_BACKOFF(8190, 6), + MAKE_VALUE_AND_BACKOFF(8190, 6), +}; static inline _Py_BackoffCounter make_backoff_counter(uint16_t value, uint16_t backoff) { - assert(backoff <= 15); - assert(value <= 0xFFF); - _Py_BackoffCounter result; - result.value_and_backoff = (value << BACKOFF_BITS) | backoff; - return result; + assert(backoff <= UNREACHABLE_BACKOFF); + assert(value <= MAX_VALUE); + return ((_Py_BackoffCounter){ + .value_and_backoff = MAKE_VALUE_AND_BACKOFF(value, backoff) + }); } static inline _Py_BackoffCounter @@ -62,14 +77,11 @@ forge_backoff_counter(uint16_t counter) static inline _Py_BackoffCounter restart_backoff_counter(_Py_BackoffCounter counter) { - assert(!is_unreachable_backoff_counter(counter)); - int backoff = counter.value_and_backoff & 15; - if (backoff < MAX_BACKOFF) { - return make_backoff_counter((1 << (backoff + 1)) - 1, backoff + 1); - } - else { - return make_backoff_counter((1 << MAX_BACKOFF) - 1, MAX_BACKOFF); - } + uint16_t backoff = counter.value_and_backoff & BACKOFF_MASK; + assert(backoff <= MAX_BACKOFF); + return ((_Py_BackoffCounter){ + .value_and_backoff = value_and_backoff_next[backoff] + }); } static inline _Py_BackoffCounter @@ -113,7 +125,7 @@ trigger_backoff_counter(void) // as we always end up tracing the loop iteration's // exhaustion iteration. Which aborts our current tracer. #define JUMP_BACKWARD_INITIAL_VALUE 4000 -#define JUMP_BACKWARD_INITIAL_BACKOFF 12 +#define JUMP_BACKWARD_INITIAL_BACKOFF 6 static inline _Py_BackoffCounter initial_jump_backoff_counter(void) { @@ -126,7 +138,7 @@ initial_jump_backoff_counter(void) * otherwise when a side exit warms up we may construct * a new trace before the Tier 1 code has properly re-specialized. */ #define SIDE_EXIT_INITIAL_VALUE 4000 -#define SIDE_EXIT_INITIAL_BACKOFF 12 +#define SIDE_EXIT_INITIAL_BACKOFF 6 static inline _Py_BackoffCounter initial_temperature_backoff_counter(void) diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 2601de40737e85..60b6fc4a72e88f 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -32,7 +32,7 @@ extern "C" { const bool _should_lock_cs = PyList_CheckExact(_orig_seq); \ PyCriticalSection _cs; \ if (_should_lock_cs) { \ - _PyCriticalSection_Begin(&_cs, _orig_seq); \ + PyCriticalSection_Begin(&_cs, _orig_seq); \ } # define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() \ @@ -77,10 +77,10 @@ _PyCriticalSection_Resume(PyThreadState *tstate); // (private) slow path for locking the mutex PyAPI_FUNC(void) -_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m); +_PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m); PyAPI_FUNC(void) -_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, +_PyCriticalSection2_BeginSlow(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, int is_m1_locked); PyAPI_FUNC(void) @@ -95,34 +95,30 @@ _PyCriticalSection_IsActive(uintptr_t tag) } static inline void -_PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m) +_PyCriticalSection_BeginMutex(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m) { if (PyMutex_LockFast(m)) { - PyThreadState *tstate = _PyThreadState_GET(); c->_cs_mutex = m; c->_cs_prev = tstate->critical_section; tstate->critical_section = (uintptr_t)c; } else { - _PyCriticalSection_BeginSlow(c, m); + _PyCriticalSection_BeginSlow(tstate, c, m); } } -#define PyCriticalSection_BeginMutex _PyCriticalSection_BeginMutex static inline void -_PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) +_PyCriticalSection_Begin(PyThreadState *tstate, PyCriticalSection *c, PyObject *op) { - _PyCriticalSection_BeginMutex(c, &op->ob_mutex); + _PyCriticalSection_BeginMutex(tstate, c, &op->ob_mutex); } -#define PyCriticalSection_Begin _PyCriticalSection_Begin // Removes the top-most critical section from the thread's stack of critical // sections. If the new top-most critical section is inactive, then it is // resumed. static inline void -_PyCriticalSection_Pop(PyCriticalSection *c) +_PyCriticalSection_Pop(PyThreadState *tstate, PyCriticalSection *c) { - PyThreadState *tstate = _PyThreadState_GET(); uintptr_t prev = c->_cs_prev; tstate->critical_section = prev; @@ -132,7 +128,7 @@ _PyCriticalSection_Pop(PyCriticalSection *c) } static inline void -_PyCriticalSection_End(PyCriticalSection *c) +_PyCriticalSection_End(PyThreadState *tstate, PyCriticalSection *c) { // If the mutex is NULL, we used the fast path in // _PyCriticalSection_BeginSlow for locks already held in the top-most @@ -141,18 +137,17 @@ _PyCriticalSection_End(PyCriticalSection *c) return; } PyMutex_Unlock(c->_cs_mutex); - _PyCriticalSection_Pop(c); + _PyCriticalSection_Pop(tstate, c); } -#define PyCriticalSection_End _PyCriticalSection_End static inline void -_PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) +_PyCriticalSection2_BeginMutex(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) { if (m1 == m2) { // If the two mutex arguments are the same, treat this as a critical // section with a single mutex. c->_cs_mutex2 = NULL; - _PyCriticalSection_BeginMutex(&c->_cs_base, m1); + _PyCriticalSection_BeginMutex(tstate, &c->_cs_base, m1); return; } @@ -167,7 +162,6 @@ _PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) if (PyMutex_LockFast(m1)) { if (PyMutex_LockFast(m2)) { - PyThreadState *tstate = _PyThreadState_GET(); c->_cs_base._cs_mutex = m1; c->_cs_mutex2 = m2; c->_cs_base._cs_prev = tstate->critical_section; @@ -176,24 +170,22 @@ _PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) tstate->critical_section = p; } else { - _PyCriticalSection2_BeginSlow(c, m1, m2, 1); + _PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 1); } } else { - _PyCriticalSection2_BeginSlow(c, m1, m2, 0); + _PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 0); } } -#define PyCriticalSection2_BeginMutex _PyCriticalSection2_BeginMutex static inline void -_PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) +_PyCriticalSection2_Begin(PyThreadState *tstate, PyCriticalSection2 *c, PyObject *a, PyObject *b) { - _PyCriticalSection2_BeginMutex(c, &a->ob_mutex, &b->ob_mutex); + _PyCriticalSection2_BeginMutex(tstate, c, &a->ob_mutex, &b->ob_mutex); } -#define PyCriticalSection2_Begin _PyCriticalSection2_Begin static inline void -_PyCriticalSection2_End(PyCriticalSection2 *c) +_PyCriticalSection2_End(PyThreadState *tstate, PyCriticalSection2 *c) { // if mutex1 is NULL, we used the fast path in // _PyCriticalSection_BeginSlow for mutexes that are already held, @@ -207,9 +199,8 @@ _PyCriticalSection2_End(PyCriticalSection2 *c) PyMutex_Unlock(c->_cs_mutex2); } PyMutex_Unlock(c->_cs_base._cs_mutex); - _PyCriticalSection_Pop(&c->_cs_base); + _PyCriticalSection_Pop(tstate, &c->_cs_base); } -#define PyCriticalSection2_End _PyCriticalSection2_End static inline void _PyCriticalSection_AssertHeld(PyMutex *mutex) @@ -251,6 +242,45 @@ _PyCriticalSection_AssertHeldObj(PyObject *op) #endif } + +#undef Py_BEGIN_CRITICAL_SECTION +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { \ + PyCriticalSection _py_cs; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection_Begin(_cs_tstate, &_py_cs, _PyObject_CAST(op)) + +#undef Py_BEGIN_CRITICAL_SECTION_MUTEX +# define Py_BEGIN_CRITICAL_SECTION_MUTEX(mutex) \ + { \ + PyCriticalSection _py_cs; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection_BeginMutex(_cs_tstate, &_py_cs, mutex) + +#undef Py_END_CRITICAL_SECTION +# define Py_END_CRITICAL_SECTION() \ + _PyCriticalSection_End(_cs_tstate, &_py_cs); \ + } + +#undef Py_BEGIN_CRITICAL_SECTION2 +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection2_Begin(_cs_tstate, &_py_cs2, _PyObject_CAST(a), _PyObject_CAST(b)) + +#undef Py_BEGIN_CRITICAL_SECTION2_MUTEX +# define Py_BEGIN_CRITICAL_SECTION2_MUTEX(m1, m2) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection2_BeginMutex(_cs_tstate, &_py_cs2, m1, m2) + +#undef Py_END_CRITICAL_SECTION2 +# define Py_END_CRITICAL_SECTION2() \ + _PyCriticalSection2_End(_cs_tstate, &_py_cs2); \ + } + #endif /* Py_GIL_DISABLED */ #ifdef __cplusplus diff --git a/Lib/data.bin b/Lib/data.bin deleted file mode 100644 index 1d1fab72c4aaa4..00000000000000 Binary files a/Lib/data.bin and /dev/null differ diff --git a/Lib/test/test_free_threading/test_csv.py b/Lib/test/test_free_threading/test_csv.py new file mode 100644 index 00000000000000..beb4510a1281b8 --- /dev/null +++ b/Lib/test/test_free_threading/test_csv.py @@ -0,0 +1,50 @@ +import csv +import io +import unittest + +from test.support import threading_helper +from test.support.threading_helper import run_concurrently + + +NTHREADS = 10 + + +@threading_helper.requires_working_threading() +class TestCSV(unittest.TestCase): + def test_concurrent_reader_next(self): + input_rows = [f"{i},{i},{i}" for i in range(50)] + input_stream = io.StringIO("\n".join(input_rows)) + reader = csv.reader(input_stream) + output_rows = [] + + def read_row(): + for row in reader: + self.assertEqual(len(row), 3) + output_rows.append(",".join(row)) + + run_concurrently(worker_func=read_row, nthreads=NTHREADS) + self.assertSetEqual(set(input_rows), set(output_rows)) + + def test_concurrent_writer_writerow(self): + output_stream = io.StringIO() + writer = csv.writer(output_stream) + row_per_thread = 10 + expected_rows = [] + + def write_row(): + for i in range(row_per_thread): + writer.writerow([i, i, i]) + expected_rows.append(f"{i},{i},{i}") + + run_concurrently(worker_func=write_row, nthreads=NTHREADS) + + # Rewind to the start of the stream and parse the rows + output_stream.seek(0) + output_rows = [line.strip() for line in output_stream.readlines()] + + self.assertEqual(len(output_rows), NTHREADS * row_per_thread) + self.assertListEqual(sorted(output_rows), sorted(expected_rows)) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index c7eea75117de8c..4113b79ef5c80b 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -590,7 +590,7 @@ def make_deferred_ref_count_obj(): class TestRacesDoNotCrash(TestBase): # Careful with these. Bigger numbers have a higher chance of catching bugs, # but you can also burn through a *ton* of type/dict/function versions: - ITEMS = 1000 + ITEMS = 1400 LOOPS = 4 WRITERS = 2 diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index e6852c93e69830..66348619073909 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -160,6 +160,16 @@ def baz(): self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents) self.assertIn(f"py::baz_fork:{script}", child_perf_file_contents) + # The parent's map should not contain the child's symbols. + self.assertNotIn(f"py::foo_fork:{script}", perf_file_contents) + self.assertNotIn(f"py::bar_fork:{script}", perf_file_contents) + self.assertNotIn(f"py::baz_fork:{script}", perf_file_contents) + + # The child's map should not contain the parent's symbols. + self.assertNotIn(f"py::foo:{script}", child_perf_file_contents) + self.assertNotIn(f"py::bar:{script}", child_perf_file_contents) + self.assertNotIn(f"py::baz:{script}", child_perf_file_contents) + @unittest.skipIf(support.check_bolt_optimized(), "fails on BOLT instrumented binaries") def test_sys_api(self): for define_eval_hook in (False, True): diff --git a/Makefile.pre.in b/Makefile.pre.in index 13108b1baf976a..1c793b6db7852c 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2573,6 +2573,7 @@ LIBSUBDIRS= asyncio \ profile \ profiling profiling/sampling profiling/tracing \ profiling/sampling/_assets \ + profiling/sampling/live_collector \ profiling/sampling/_vendor/d3/7.8.5 \ profiling/sampling/_vendor/d3-flame-graph/4.1.3 \ pydoc_data \ diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-10-00-14-20.gh-issue-116738.IxliC_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-10-00-14-20.gh-issue-116738.IxliC_.rst new file mode 100644 index 00000000000000..8b08bccafd73eb --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-10-00-14-20.gh-issue-116738.IxliC_.rst @@ -0,0 +1,2 @@ +Make csv module thread-safe on the :term:`free threaded ` +build. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-14-04-35.gh-issue-141589.VfdMDD.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-14-04-35.gh-issue-141589.VfdMDD.rst new file mode 100644 index 00000000000000..5eb0e0c8b89f7a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-14-04-35.gh-issue-141589.VfdMDD.rst @@ -0,0 +1,3 @@ +Change ``backoff counter`` to use prime numbers instead of powers of 2. +Use only 3 bits for ``counter`` and 13 bits for ``value``. +This allows to support values up to 8191. Patch by Mikhail Efimov. diff --git a/Modules/_csv.c b/Modules/_csv.c index 87be7a8f1fb136..1f41976e95fdb1 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -918,7 +918,7 @@ parse_reset(ReaderObj *self) } static PyObject * -Reader_iternext(PyObject *op) +Reader_iternext_lock_held(PyObject *op) { ReaderObj *self = _ReaderObj_CAST(op); @@ -985,6 +985,16 @@ Reader_iternext(PyObject *op) return fields; } +static PyObject * +Reader_iternext(PyObject *op) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = Reader_iternext_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + static void Reader_dealloc(PyObject *op) { @@ -1303,15 +1313,8 @@ join_append_lineterminator(WriterObj *self) return 1; } -PyDoc_STRVAR(csv_writerow_doc, -"writerow($self, row, /)\n" -"--\n\n" -"Construct and write a CSV record from an iterable of fields.\n" -"\n" -"Non-string elements will be converted to string."); - static PyObject * -csv_writerow(PyObject *op, PyObject *seq) +csv_writerow_lock_held(PyObject *op, PyObject *seq) { WriterObj *self = _WriterObj_CAST(op); DialectObj *dialect = self->dialect; @@ -1414,6 +1417,23 @@ csv_writerow(PyObject *op, PyObject *seq) return result; } +PyDoc_STRVAR(csv_writerow_doc, +"writerow($self, row, /)\n" +"--\n\n" +"Construct and write a CSV record from an iterable of fields.\n" +"\n" +"Non-string elements will be converted to string."); + +static PyObject * +csv_writerow(PyObject *op, PyObject *seq) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = csv_writerow_lock_held(op, seq); + Py_END_CRITICAL_SECTION(); + return result; +} + PyDoc_STRVAR(csv_writerows_doc, "writerows($self, rows, /)\n" "--\n\n" diff --git a/Python/critical_section.c b/Python/critical_section.c index 218b580e95176d..2c2152f5de4716 100644 --- a/Python/critical_section.c +++ b/Python/critical_section.c @@ -17,10 +17,9 @@ untag_critical_section(uintptr_t tag) #endif void -_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m) +_PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m) { #ifdef Py_GIL_DISABLED - PyThreadState *tstate = _PyThreadState_GET(); // As an optimisation for locking the same object recursively, skip // locking if the mutex is currently locked by the top-most critical // section. @@ -53,11 +52,10 @@ _PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m) } void -_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, +_PyCriticalSection2_BeginSlow(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, int is_m1_locked) { #ifdef Py_GIL_DISABLED - PyThreadState *tstate = _PyThreadState_GET(); c->_cs_base._cs_mutex = NULL; c->_cs_mutex2 = NULL; c->_cs_base._cs_prev = tstate->critical_section; @@ -139,7 +137,7 @@ void PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) { #ifdef Py_GIL_DISABLED - _PyCriticalSection_Begin(c, op); + _PyCriticalSection_Begin(_PyThreadState_GET(), c, op); #endif } @@ -148,7 +146,7 @@ void PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m) { #ifdef Py_GIL_DISABLED - _PyCriticalSection_BeginMutex(c, m); + _PyCriticalSection_BeginMutex(_PyThreadState_GET(), c, m); #endif } @@ -157,7 +155,7 @@ void PyCriticalSection_End(PyCriticalSection *c) { #ifdef Py_GIL_DISABLED - _PyCriticalSection_End(c); + _PyCriticalSection_End(_PyThreadState_GET(), c); #endif } @@ -166,7 +164,7 @@ void PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) { #ifdef Py_GIL_DISABLED - _PyCriticalSection2_Begin(c, a, b); + _PyCriticalSection2_Begin(_PyThreadState_GET(), c, a, b); #endif } @@ -175,7 +173,7 @@ void PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) { #ifdef Py_GIL_DISABLED - _PyCriticalSection2_BeginMutex(c, m1, m2); + _PyCriticalSection2_BeginMutex(_PyThreadState_GET(), c, m1, m2); #endif } @@ -184,6 +182,6 @@ void PyCriticalSection2_End(PyCriticalSection2 *c) { #ifdef Py_GIL_DISABLED - _PyCriticalSection2_End(c); + _PyCriticalSection2_End(_PyThreadState_GET(), c); #endif }