diff --git a/Doc/library/http.cookies.rst b/Doc/library/http.cookies.rst index 88e978d7f5eafb..50b65459d2f699 100644 --- a/Doc/library/http.cookies.rst +++ b/Doc/library/http.cookies.rst @@ -294,9 +294,9 @@ The following example demonstrates how to use the :mod:`http.cookies` module. Set-Cookie: chips=ahoy Set-Cookie: vienna=finger >>> C = cookies.SimpleCookie() - >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=;";') >>> print(C) - Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=;" >>> C = cookies.SimpleCookie() >>> C["oreo"] = "doublestuff" >>> C["oreo"]["path"] = "/" diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f11448b06696ad..723657e4cef10d 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -411,6 +411,7 @@ typedef struct _PyOptimizationConfig { // Optimization flags bool specialization_enabled; + bool uops_optimize_enabled; } _PyOptimizationConfig; struct diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index 74349bb63d66e2..917280037d4dbb 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -87,9 +87,9 @@ such trickeries do not confuse it. >>> C = cookies.SimpleCookie() - >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=;";') >>> print(C) - Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=;" Each element of the Cookie also supports all of the RFC 2109 Cookie attributes. Here's an example which sets the Path @@ -170,6 +170,15 @@ class CookieError(Exception): }) _is_legal_key = re.compile('[%s]+' % re.escape(_LegalChars)).fullmatch +_control_character_re = re.compile(r'[\x00-\x1F\x7F]') + + +def _has_control_character(*val): + """Detects control characters within a value. + Supports any type, as header values can be any type. + """ + return any(_control_character_re.search(str(v)) for v in val) + def _quote(str): r"""Quote a string for use in a cookie header. @@ -294,12 +303,16 @@ def __setitem__(self, K, V): K = K.lower() if not K in self._reserved: raise CookieError("Invalid attribute %r" % (K,)) + if _has_control_character(K, V): + raise CookieError(f"Control characters are not allowed in cookies {K!r} {V!r}") dict.__setitem__(self, K, V) def setdefault(self, key, val=None): key = key.lower() if key not in self._reserved: raise CookieError("Invalid attribute %r" % (key,)) + if _has_control_character(key, val): + raise CookieError("Control characters are not allowed in cookies %r %r" % (key, val,)) return dict.setdefault(self, key, val) def __eq__(self, morsel): @@ -335,6 +348,9 @@ def set(self, key, val, coded_val): raise CookieError('Attempt to set a reserved key %r' % (key,)) if not _is_legal_key(key): raise CookieError('Illegal key %r' % (key,)) + if _has_control_character(key, val, coded_val): + raise CookieError( + "Control characters are not allowed in cookies %r %r %r" % (key, val, coded_val,)) # It's a good key, so save it. self._key = key @@ -488,7 +504,10 @@ def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): result = [] items = sorted(self.items()) for key, value in items: - result.append(value.output(attrs, header)) + value_output = value.output(attrs, header) + if _has_control_character(value_output): + raise CookieError("Control characters are not allowed in cookies") + result.append(value_output) return sep.join(result) __str__ = output diff --git a/Lib/imaplib.py b/Lib/imaplib.py index 22a0afcd981519..cb3edceae0d9f1 100644 --- a/Lib/imaplib.py +++ b/Lib/imaplib.py @@ -129,7 +129,7 @@ # We compile these in _mode_xxx. _Literal = br'.*{(?P\d+)}$' _Untagged_status = br'\* (?P\d+) (?P[A-Z-]+)( (?P.*))?' - +_control_chars = re.compile(b'[\x00-\x1F\x7F]') class IMAP4: @@ -1105,6 +1105,8 @@ def _command(self, name, *args): if arg is None: continue if isinstance(arg, str): arg = bytes(arg, self._encoding) + if _control_chars.search(arg): + raise ValueError("Control characters not allowed in commands") data = data + b' ' + arg literal = self.literal diff --git a/Lib/poplib.py b/Lib/poplib.py index 4469bff44b4c45..b97274c5c32ee6 100644 --- a/Lib/poplib.py +++ b/Lib/poplib.py @@ -122,6 +122,8 @@ def _putline(self, line): def _putcmd(self, line): if self._debugging: print('*cmd*', repr(line)) line = bytes(line, self.encoding) + if re.search(b'[\x00-\x1F\x7F]', line): + raise ValueError('Control characters not allowed in commands') self._putline(line) diff --git a/Lib/test/test_coroutines.py b/Lib/test/test_coroutines.py index 6ad7e7994f32b0..93e9e7a8393cb1 100644 --- a/Lib/test/test_coroutines.py +++ b/Lib/test/test_coroutines.py @@ -2265,6 +2265,20 @@ def c(): # before fixing, visible stack from throw would be shorter than from send. self.assertEqual(len_send, len_throw) + def test_call_generator_in_frame_clear(self): + # gh-143939: Running a generator while clearing the coroutine's frame + # should not be misinterpreted as a yield. + class CallGeneratorOnDealloc: + def __del__(self): + next(x for x in [1]) + + async def coro(): + obj = CallGeneratorOnDealloc() + return 42 + + yielded, result = run_async(coro()) + self.assertEqual(yielded, []) + self.assertEqual(result, 42) @unittest.skipIf( support.is_emscripten or support.is_wasi, diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py index c2ed30831b2e0e..7d072d5fd67ca7 100644 --- a/Lib/test/test_http_cookies.py +++ b/Lib/test/test_http_cookies.py @@ -17,10 +17,10 @@ def test_basic(self): 'repr': "", 'output': 'Set-Cookie: chips=ahoy\nSet-Cookie: vienna=finger'}, - {'data': 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', - 'dict': {'keebler' : 'E=mc2; L="Loves"; fudge=\012;'}, - 'repr': '''''', - 'output': 'Set-Cookie: keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"'}, + {'data': 'keebler="E=mc2; L=\\"Loves\\"; fudge=;"', + 'dict': {'keebler' : 'E=mc2; L="Loves"; fudge=;'}, + 'repr': '''''', + 'output': 'Set-Cookie: keebler="E=mc2; L=\\"Loves\\"; fudge=;"'}, # Check illegal cookies that have an '=' char in an unquoted value {'data': 'keebler=E=mc2', @@ -594,6 +594,50 @@ def test_repr(self): r'Set-Cookie: key=coded_val; ' r'expires=\w+, \d+ \w+ \d+ \d+:\d+:\d+ \w+') + def test_control_characters(self): + for c0 in support.control_characters_c0(): + morsel = cookies.Morsel() + + # .__setitem__() + with self.assertRaises(cookies.CookieError): + morsel[c0] = "val" + with self.assertRaises(cookies.CookieError): + morsel["path"] = c0 + + # .setdefault() + with self.assertRaises(cookies.CookieError): + morsel.setdefault("path", c0) + with self.assertRaises(cookies.CookieError): + morsel.setdefault(c0, "val") + + # .set() + with self.assertRaises(cookies.CookieError): + morsel.set(c0, "val", "coded-value") + with self.assertRaises(cookies.CookieError): + morsel.set("path", c0, "coded-value") + with self.assertRaises(cookies.CookieError): + morsel.set("path", "val", c0) + + def test_control_characters_output(self): + # Tests that even if the internals of Morsel are modified + # that a call to .output() has control character safeguards. + for c0 in support.control_characters_c0(): + morsel = cookies.Morsel() + morsel.set("key", "value", "coded-value") + morsel._key = c0 # Override private variable. + cookie = cookies.SimpleCookie() + cookie["cookie"] = morsel + with self.assertRaises(cookies.CookieError): + cookie.output() + + morsel = cookies.Morsel() + morsel.set("key", "value", "coded-value") + morsel._coded_value = c0 # Override private variable. + cookie = cookies.SimpleCookie() + cookie["cookie"] = morsel + with self.assertRaises(cookies.CookieError): + cookie.output() + def load_tests(loader, tests, pattern): tests.addTest(doctest.DocTestSuite(cookies)) diff --git a/Lib/test/test_imaplib.py b/Lib/test/test_imaplib.py index 430fa71fa29f59..cb5454b40eccf9 100644 --- a/Lib/test/test_imaplib.py +++ b/Lib/test/test_imaplib.py @@ -657,6 +657,12 @@ def test_unselect(self): self.assertEqual(data[0], b'Returned to authenticated state. (Success)') self.assertEqual(client.state, 'AUTH') + def test_control_characters(self): + client, _ = self._setup(SimpleIMAPHandler) + for c0 in support.control_characters_c0(): + with self.assertRaises(ValueError): + client.login(f'user{c0}', 'pass') + # property tests def test_file_property_should_not_be_accessed(self): diff --git a/Lib/test/test_poplib.py b/Lib/test/test_poplib.py index ef2da97f86734a..18ca7cb556836e 100644 --- a/Lib/test/test_poplib.py +++ b/Lib/test/test_poplib.py @@ -17,6 +17,7 @@ from test.support import threading_helper from test.support import asynchat from test.support import asyncore +from test.support import control_characters_c0 test_support.requires_working_socket(module=True) @@ -395,6 +396,13 @@ def test_quit(self): self.assertIsNone(self.client.sock) self.assertIsNone(self.client.file) + def test_control_characters(self): + for c0 in control_characters_c0(): + with self.assertRaises(ValueError): + self.client.user(f'user{c0}') + with self.assertRaises(ValueError): + self.client.pass_(f'{c0}pass') + @requires_ssl def test_stls_capa(self): capa = self.client.capa() diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index ae524c5ffba6b1..2dd739b77b8e4d 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -10,6 +10,7 @@ from test import support from test.support import os_helper from test.support import socket_helper +from test.support import control_characters_c0 import os import socket try: @@ -590,6 +591,13 @@ def test_invalid_base64_data(self): # missing padding character self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=') + def test_invalid_mediatype(self): + for c0 in control_characters_c0(): + self.assertRaises(ValueError,urllib.request.urlopen, + f'data:text/html;{c0},data') + for c0 in control_characters_c0(): + self.assertRaises(ValueError,urllib.request.urlopen, + f'data:text/html{c0};base64,ZGF0YQ==') class urlretrieve_FileTests(unittest.TestCase): """Test urllib.urlretrieve() on local files""" diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index f32de189b1353a..f5f17f223a4585 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1636,6 +1636,11 @@ def data_open(self, req): scheme, data = url.split(":",1) mediatype, data = data.split(",",1) + # Disallow control characters within mediatype. + if re.search(r"[\x00-\x1F\x7F]", mediatype): + raise ValueError( + "Control characters not allowed in data: mediatype") + # even base64 encoded data URLs might be quoted so unquote in any case: data = unquote_to_bytes(data) if mediatype.endswith(";base64"): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-16-23-19-38.gh-issue-143939.w9TWch.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-16-23-19-38.gh-issue-143939.w9TWch.rst new file mode 100644 index 00000000000000..47423663e07864 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-16-23-19-38.gh-issue-143939.w9TWch.rst @@ -0,0 +1,3 @@ +Fix erroneous "cannot reuse already awaited coroutine" error that could +occur when a generator was run during the process of clearing a coroutine's +frame. diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-13-15.gh-issue-143919.kchwZV.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-13-15.gh-issue-143919.kchwZV.rst new file mode 100644 index 00000000000000..788c3e4ac2ebf7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-13-15.gh-issue-143919.kchwZV.rst @@ -0,0 +1 @@ +Reject control characters in :class:`http.cookies.Morsel` fields and values. diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-41-06.gh-issue-143921.AeCOor.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-41-06.gh-issue-143921.AeCOor.rst new file mode 100644 index 00000000000000..4e13fe92bc60fb --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-41-06.gh-issue-143921.AeCOor.rst @@ -0,0 +1 @@ +Reject control characters in IMAP commands. diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-43-47.gh-issue-143923.DuytMe.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-43-47.gh-issue-143923.DuytMe.rst new file mode 100644 index 00000000000000..3cde4df3e0069f --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-43-47.gh-issue-143923.DuytMe.rst @@ -0,0 +1 @@ +Reject control characters in POP3 commands. diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst new file mode 100644 index 00000000000000..46109dfbef3ee7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst @@ -0,0 +1 @@ +Reject control characters in ``data:`` URL media types. diff --git a/Objects/genobject.c b/Objects/genobject.c index 09407d60af62be..fcdb9017a35f5b 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -280,6 +280,9 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult, int exc) if (return_kind == GENERATOR_YIELD) { assert(result != NULL && !_PyErr_Occurred(tstate)); +#ifndef Py_GIL_DISABLED + assert(FRAME_STATE_SUSPENDED(gen->gi_frame_state)); +#endif *presult = result; return PYGEN_NEXT; } diff --git a/Python/ceval.c b/Python/ceval.c index 87481ba6d0377f..bdf1e9bb742333 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1914,7 +1914,6 @@ clear_gen_frame(PyThreadState *tstate, _PyInterpreterFrame * frame) assert(frame->owner == FRAME_OWNED_BY_GENERATOR); PyGenObject *gen = _PyGen_GetGeneratorFromFrame(frame); FT_ATOMIC_STORE_INT8_RELEASE(gen->gi_frame_state, FRAME_CLEARED); - ((_PyThreadStateImpl *)tstate)->generator_return_kind = GENERATOR_RETURN; assert(tstate->exc_info == &gen->gi_exc_state); tstate->exc_info = gen->gi_exc_state.previous_item; gen->gi_exc_state.previous_item = NULL; @@ -1922,6 +1921,9 @@ clear_gen_frame(PyThreadState *tstate, _PyInterpreterFrame * frame) frame->previous = NULL; _PyFrame_ClearExceptCode(frame); _PyErr_ClearExcState(&gen->gi_exc_state); + // gh-143939: There must not be any escaping calls between setting + // the generator return kind and returning from _PyEval_EvalFrame. + ((_PyThreadStateImpl *)tstate)->generator_return_kind = GENERATOR_RETURN; } void diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index beb3fa588f40e7..0ec9c58a792e6d 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -308,18 +308,17 @@ disable_deferred_refcounting(PyObject *op) // should also be disabled when we turn off deferred refcounting. _PyObject_DisablePerThreadRefcounting(op); } - if (_PyObject_GC_IS_TRACKED(op)) { - // Generators and frame objects may contain deferred references to other - // objects. If the pointed-to objects are part of cyclic trash, we may - // have disabled deferred refcounting on them and need to ensure that we - // use strong references, in case the generator or frame object is - // resurrected by a finalizer. - if (PyGen_CheckExact(op) || PyCoro_CheckExact(op) || PyAsyncGen_CheckExact(op)) { - frame_disable_deferred_refcounting(&((PyGenObject *)op)->gi_iframe); - } - else if (PyFrame_Check(op)) { - frame_disable_deferred_refcounting(((PyFrameObject *)op)->f_frame); - } + + // Generators and frame objects may contain deferred references to other + // objects. If the pointed-to objects are part of cyclic trash, we may + // have disabled deferred refcounting on them and need to ensure that we + // use strong references, in case the generator or frame object is + // resurrected by a finalizer. + if (PyGen_CheckExact(op) || PyCoro_CheckExact(op) || PyAsyncGen_CheckExact(op)) { + frame_disable_deferred_refcounting(&((PyGenObject *)op)->gi_iframe); + } + else if (PyFrame_Check(op)) { + frame_disable_deferred_refcounting(((PyFrameObject *)op)->f_frame); } } @@ -507,6 +506,10 @@ gc_visit_thread_stacks(PyInterpreterState *interp, struct collection_state *stat static bool gc_maybe_untrack(PyObject *op) { + if (_PyObject_HasDeferredRefcount(op)) { + // deferred refcounting only works if the object is tracked + return false; + } // Currently we only check for tuples containing only non-GC objects. In // theory we could check other immutable objects that contain references // to non-GC objects. @@ -1019,7 +1022,7 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, } _PyObject_ASSERT(op, refcount >= 0); - if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) { + if (refcount > 0) { if (gc_maybe_untrack(op)) { gc_restore_refs(op); return true; @@ -1241,30 +1244,19 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } + if (state->reason == _Py_GC_REASON_SHUTDOWN) { + // Disable deferred refcounting for reachable objects as well during + // interpreter shutdown. This ensures that these objects are collected + // immediately when their last reference is removed. + disable_deferred_refcounting(op); + } + // object is reachable, restore `ob_tid`; we're done with these objects gc_restore_tid(op); gc_clear_alive(op); return true; } -// Disable deferred refcounting for reachable objects during interpreter -// shutdown. This ensures that these objects are collected immediately when -// their last reference is removed. This needs to consider both tracked and -// untracked GC objects, since either might have deferred refcounts enabled. -static bool -scan_heap_disable_deferred(const mi_heap_t *heap, const mi_heap_area_t *area, - void *block, size_t block_size, void *args) -{ - PyObject *op = op_from_block_all_gc(block, args); - if (op == NULL) { - return true; - } - if (!_Py_IsImmortal(op)) { - disable_deferred_refcounting(op); - } - return true; -} - static int move_legacy_finalizer_reachable(struct collection_state *state); @@ -1499,10 +1491,6 @@ deduce_unreachable_heap(PyInterpreterState *interp, // Restores ob_tid for reachable objects. gc_visit_heaps(interp, &scan_heap_visitor, &state->base); - if (state->reason == _Py_GC_REASON_SHUTDOWN) { - gc_visit_heaps(interp, &scan_heap_disable_deferred, &state->base); - } - if (state->legacy_finalizers.head) { // There may be objects reachable from legacy finalizers that are in // the unreachable set. We need to mark them as reachable. diff --git a/Python/optimizer.c b/Python/optimizer.c index ab0ef3db4e4882..15a1eb5a17745b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1514,11 +1514,7 @@ uop_optimize( _PyBloomFilter *dependencies = &_tstate->jit_tracer_state->prev_state.dependencies; _PyUOpInstruction *buffer = _tstate->jit_tracer_state->code_buffer; OPT_STAT_INC(attempts); - char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); - bool is_noopt = true; - if (env_var == NULL || *env_var == '\0' || *env_var > '0') { - is_noopt = false; - } + bool is_noopt = !tstate->interp->opt_config.uops_optimize_enabled; int curr_stackentries = _tstate->jit_tracer_state->initial_state.stack_depth; int length = _tstate->jit_tracer_state->prev_state.code_curr_size; if (length <= CODE_SIZE_NO_PROGRESS) { diff --git a/Python/pystate.c b/Python/pystate.c index 86dee70734a097..89374e16722494 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -522,6 +522,13 @@ is_env_enabled(const char *env_name) return env && *env != '\0' && *env != '0'; } +static inline bool +is_env_disabled(const char *env_name) +{ + char *env = Py_GETENV(env_name); + return env != NULL && *env == '0'; +} + static inline void init_policy(uint16_t *target, const char *env_name, uint16_t default_value, long min_value, long max_value) @@ -619,6 +626,7 @@ init_interpreter(PyInterpreterState *interp, SIDE_EXIT_INITIAL_BACKOFF, 0, MAX_BACKOFF); interp->opt_config.specialization_enabled = !is_env_enabled("PYTHON_SPECIALIZATION_OFF"); + interp->opt_config.uops_optimize_enabled = !is_env_disabled("PYTHON_UOPS_OPTIMIZE"); if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ interp->dtoa = (struct _dtoa_state)_dtoa_state_INIT(interp); diff --git a/Python/specialize.c b/Python/specialize.c index 432053f85221a3..845416a1d5be35 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -362,7 +362,7 @@ static uint32_t function_get_version(PyObject *o, int opcode); static void maybe_enable_deferred_ref_count(PyObject *op) { - if (!_Py_IsOwnedByCurrentThread(op)) { + if (!_Py_IsOwnedByCurrentThread(op) && _PyObject_GC_IS_TRACKED(op)) { // For module level variables that are heavily used from multiple // threads, deferred reference counting provides good scaling // benefits. The downside is that the object will only be deallocated