From 5b1862bdd8021b5295df95d730c2d2efa827fa88 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" <68491+gpshead@users.noreply.github.com> Date: Fri, 28 Nov 2025 22:07:03 -0800 Subject: [PATCH 1/5] gh-87512: Fix `subprocess` using `timeout=` on Windows blocking with a large `input=` (GH-142058) On Windows, Popen._communicate() previously wrote to stdin synchronously, which could block indefinitely if the subprocess didn't consume input= quickly and the pipe buffer filled up. The timeout= parameter was only checked when joining the reader threads, not during the stdin write. This change moves the Windows stdin writing to a background thread (similar to how stdout/stderr are read in threads), allowing the timeout to be properly enforced. If timeout expires, TimeoutExpired is raised promptly and the writer thread continues in the background. Subsequent calls to communicate() will join the existing writer thread. Adds test_communicate_timeout_large_input to verify that TimeoutExpired is raised promptly when communicate() is called with large input and a timeout, even when the subprocess doesn't consume stdin quickly. This test already passed on POSIX (where select() is used) but failed on Windows where the stdin write blocks without checking the timeout. Co-authored-by: Claude Opus 4.5 --- Lib/subprocess.py | 23 +++++++- Lib/test/test_subprocess.py | 56 +++++++++++++++++++ ...5-11-29-03-02-45.gh-issue-87512.bn4xbm.rst | 5 ++ 3 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-11-29-03-02-45.gh-issue-87512.bn4xbm.rst diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 99e2d0755d7e46..4d5ab6fbff0a46 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -1613,6 +1613,10 @@ def _readerthread(self, fh, buffer): fh.close() + def _writerthread(self, input): + self._stdin_write(input) + + def _communicate(self, input, endtime, orig_timeout): # Start reader threads feeding into a list hanging off of this # object, unless they've already been started. @@ -1631,8 +1635,23 @@ def _communicate(self, input, endtime, orig_timeout): self.stderr_thread.daemon = True self.stderr_thread.start() - if self.stdin: - self._stdin_write(input) + # Start writer thread to send input to stdin, unless already + # started. The thread writes input and closes stdin when done, + # or continues in the background on timeout. + if self.stdin and not hasattr(self, "_stdin_thread"): + self._stdin_thread = \ + threading.Thread(target=self._writerthread, + args=(input,)) + self._stdin_thread.daemon = True + self._stdin_thread.start() + + # Wait for the writer thread, or time out. If we time out, the + # thread remains writing and the fd left open in case the user + # calls communicate again. + if hasattr(self, "_stdin_thread"): + self._stdin_thread.join(self._remaining_time(endtime)) + if self._stdin_thread.is_alive(): + raise TimeoutExpired(self.args, orig_timeout) # Wait for the reader threads, or time out. If we time out, the # threads remain reading and the fds left open in case the user diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 2d717f985a3ac3..806a1e3fa303eb 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1034,6 +1034,62 @@ def test_communicate_timeout_large_output(self): (stdout, _) = p.communicate() self.assertEqual(len(stdout), 4 * 64 * 1024) + def test_communicate_timeout_large_input(self): + # Test that timeout is enforced when writing large input to a + # slow-to-read subprocess, and that partial input is preserved + # for continuation after timeout (gh-141473). + # + # This is a regression test for Windows matching POSIX behavior. + # On POSIX, select() is used to multiplex I/O with timeout checking. + # On Windows, stdin writing must also honor the timeout rather than + # blocking indefinitely when the pipe buffer fills. + + # Input larger than typical pipe buffer (4-64KB on Windows) + input_data = b"x" * (128 * 1024) + + p = subprocess.Popen( + [sys.executable, "-c", + "import sys, time; " + "time.sleep(30); " # Don't read stdin for a long time + "sys.stdout.buffer.write(sys.stdin.buffer.read())"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + try: + timeout = 0.2 + start = time.monotonic() + try: + p.communicate(input_data, timeout=timeout) + # If we get here without TimeoutExpired, the timeout was ignored + elapsed = time.monotonic() - start + self.fail( + f"TimeoutExpired not raised. communicate() completed in " + f"{elapsed:.2f}s, but subprocess sleeps for 30s. " + "Stdin writing blocked without enforcing timeout.") + except subprocess.TimeoutExpired: + elapsed = time.monotonic() - start + + # Timeout should occur close to the specified timeout value, + # not after waiting for the subprocess to finish sleeping. + # Allow generous margin for slow CI, but must be well under + # the subprocess sleep time. + self.assertLess(elapsed, 5.0, + f"TimeoutExpired raised after {elapsed:.2f}s; expected ~{timeout}s. " + "Stdin writing blocked without checking timeout.") + + # After timeout, continue communication. The remaining input + # should be sent and we should receive all data back. + stdout, stderr = p.communicate() + + # Verify all input was eventually received by the subprocess + self.assertEqual(len(stdout), len(input_data), + f"Expected {len(input_data)} bytes output but got {len(stdout)}") + self.assertEqual(stdout, input_data) + finally: + p.kill() + p.wait() + # Test for the fd leak reported in http://bugs.python.org/issue2791. def test_communicate_pipe_fd_leak(self): for stdin_pipe in (False, True): diff --git a/Misc/NEWS.d/next/Library/2025-11-29-03-02-45.gh-issue-87512.bn4xbm.rst b/Misc/NEWS.d/next/Library/2025-11-29-03-02-45.gh-issue-87512.bn4xbm.rst new file mode 100644 index 00000000000000..091350108eea1b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-29-03-02-45.gh-issue-87512.bn4xbm.rst @@ -0,0 +1,5 @@ +Fix :func:`subprocess.Popen.communicate` timeout handling on Windows +when writing large input. Previously, the timeout was ignored during +stdin writing, causing the method to block indefinitely if the child +process did not consume input quickly. The stdin write is now performed +in a background thread, allowing the timeout to be properly enforced. From 5e749d3743621be4c8bf5618ed3249e9fde9a599 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 29 Nov 2025 08:00:14 +0100 Subject: [PATCH 2/5] Fix multiprocessing queue test_get() (GH-142024) * Replace sleep() with support.sleeping_retry(). * Test get_nowait() first. * Restore previously disabled test. Fix the failure: FAIL: test_get (test.test_multiprocessing_spawn.test_processes.WithProcessesTestQueue.test_get) ---------------------------------------------------------------------- Traceback (most recent call last): File "Lib/test/_test_multiprocessing.py", line 1208, in test_get self.assertEqual(queue_empty(queue), False) ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^ AssertionError: True != False --- Lib/test/_test_multiprocessing.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 9f8412fe9394eb..d718a27231897f 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -1204,7 +1204,7 @@ def test_put(self): @classmethod def _test_get(cls, queue, child_can_start, parent_can_continue): child_can_start.wait() - #queue.put(1) + queue.put(1) queue.put(2) queue.put(3) queue.put(4) @@ -1229,15 +1229,16 @@ def test_get(self): child_can_start.set() parent_can_continue.wait() - time.sleep(DELTA) + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if not queue_empty(queue): + break self.assertEqual(queue_empty(queue), False) - # Hangs unexpectedly, remove for now - #self.assertEqual(queue.get(), 1) + self.assertEqual(queue.get_nowait(), 1) self.assertEqual(queue.get(True, None), 2) self.assertEqual(queue.get(True), 3) self.assertEqual(queue.get(timeout=1), 4) - self.assertEqual(queue.get_nowait(), 5) + self.assertEqual(queue.get(), 5) self.assertEqual(queue_empty(queue), True) From 440bcb94560937888cd9bcb28a138acc2c6a6cbc Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 29 Nov 2025 08:08:17 +0100 Subject: [PATCH 3/5] gh-141994: Warn of XXE vulnerability in documentation of SAX feature `xml.sax.handler.feature_external_ges` (GH-141996) Doc/library/xml.sax.handler.rst: Warn of XXE with feature_external_ges Related to commit baa9f338971c6a13433a8232db77cd45e6b87b77 --- Doc/library/xml.sax.handler.rst | 8 ++++++++ .../2025-11-26-23-30-09.gh-issue-141994.arBEG6.rst | 4 ++++ 2 files changed, 12 insertions(+) create mode 100644 Misc/NEWS.d/next/Documentation/2025-11-26-23-30-09.gh-issue-141994.arBEG6.rst diff --git a/Doc/library/xml.sax.handler.rst b/Doc/library/xml.sax.handler.rst index 38ca4507d81e76..f1af7253e437b4 100644 --- a/Doc/library/xml.sax.handler.rst +++ b/Doc/library/xml.sax.handler.rst @@ -96,6 +96,14 @@ for the feature and property names. .. data:: feature_external_ges + .. warning:: + + Enabling opens a vulnerability to + `external entity attacks `_ + if the parser is used with user-provided XML content. + Please reflect on your `threat model `_ + before enabling this feature. + | value: ``"http://xml.org/sax/features/external-general-entities"`` | true: Include all external general (text) entities. | false: Do not include external general entities. diff --git a/Misc/NEWS.d/next/Documentation/2025-11-26-23-30-09.gh-issue-141994.arBEG6.rst b/Misc/NEWS.d/next/Documentation/2025-11-26-23-30-09.gh-issue-141994.arBEG6.rst new file mode 100644 index 00000000000000..c370e8a86e1766 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2025-11-26-23-30-09.gh-issue-141994.arBEG6.rst @@ -0,0 +1,4 @@ +:mod:`xml.sax.handler`: Make Documentation of +:data:`xml.sax.handler.feature_external_ges` warn of opening up to `external +entity attacks `_. +Patch by Sebastian Pipping. From 890fe5aad55d4f28cda56834f9457ff79e9e8c60 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 29 Nov 2025 09:11:59 +0200 Subject: [PATCH 4/5] Docs: multi-disk ZIP files -> multipart ZIP files (GH-141962) * Remove some old currentlies * multi-disk -> multipart * Sentence case headings --- Doc/library/zipfile.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 5a8bbc8c1aedf7..ae4e25b13b92cd 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -16,10 +16,10 @@ provides tools to create, read, write, append, and list a ZIP file. Any advanced use of this module will require an understanding of the format, as defined in `PKZIP Application Note`_. -This module does not currently handle multi-disk ZIP files. +This module does not handle multipart ZIP files. It can handle ZIP files that use the ZIP64 extensions (that is ZIP files that are more than 4 GiB in size). It supports -decryption of encrypted files in ZIP archives, but it currently cannot +decryption of encrypted files in ZIP archives, but it cannot create an encrypted file. Decryption is extremely slow as it is implemented in native Python rather than C. @@ -175,7 +175,7 @@ The module defines the following items: .. _zipfile-objects: -ZipFile Objects +ZipFile objects --------------- @@ -248,7 +248,7 @@ ZipFile Objects .. note:: *metadata_encoding* is an instance-wide setting for the ZipFile. - It is not currently possible to set this on a per-member basis. + It is not possible to set this on a per-member basis. This attribute is a workaround for legacy implementations which produce archives with names in the current locale encoding or code page (mostly @@ -571,7 +571,7 @@ The following data attributes are also available: .. _path-objects: -Path Objects +Path objects ------------ .. class:: Path(root, at='') @@ -707,7 +707,7 @@ changes. .. _pyzipfile-objects: -PyZipFile Objects +PyZipFile objects ----------------- The :class:`PyZipFile` constructor takes the same parameters as the @@ -784,7 +784,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the .. _zipinfo-objects: -ZipInfo Objects +ZipInfo objects --------------- Instances of the :class:`ZipInfo` class are returned by the :meth:`.getinfo` and @@ -954,7 +954,7 @@ Instances have the following methods and attributes: .. _zipfile-commandline: .. program:: zipfile -Command-Line Interface +Command-line interface ---------------------- The :mod:`zipfile` module provides a simple command-line interface to interact @@ -1029,7 +1029,7 @@ From file itself Decompression may fail due to incorrect password / CRC checksum / ZIP format or unsupported compression method / decryption. -File System limitations +File system limitations ~~~~~~~~~~~~~~~~~~~~~~~ Exceeding limitations on different file systems can cause decompression failed. From cfcd52490d6531f3b0e8ddd4bb2b40fb6baee854 Mon Sep 17 00:00:00 2001 From: Moshe Kaplan Date: Sat, 29 Nov 2025 02:23:34 -0500 Subject: [PATCH 5/5] GH-141963: Clarify argparse documentation (GH-141964) Clarify argparse documentation Tightens the phrasing for several argparse actions. --- Doc/library/argparse.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 2a39f248651936..71c4f094886546 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -767,9 +767,9 @@ how the command-line arguments should be handled. The supplied actions are: Namespace(foo=42) * ``'store_true'`` and ``'store_false'`` - These are special cases of - ``'store_const'`` used for storing the values ``True`` and ``False`` - respectively. In addition, they create default values of ``False`` and - ``True`` respectively:: + ``'store_const'`` that respectively store the values ``True`` and ``False`` + with default values of ``False`` and + ``True``:: >>> parser = argparse.ArgumentParser() >>> parser.add_argument('--foo', action='store_true') @@ -789,8 +789,8 @@ how the command-line arguments should be handled. The supplied actions are: >>> parser.parse_args('--foo 1 --foo 2'.split()) Namespace(foo=['0', '1', '2']) -* ``'append_const'`` - This stores a list, and appends the value specified by - the const_ keyword argument to the list; note that the const_ keyword +* ``'append_const'`` - This appends the value specified by + the const_ keyword argument to a list; note that the const_ keyword argument defaults to ``None``. The ``'append_const'`` action is typically useful when multiple arguments need to store constants to the same list. For example:: @@ -801,8 +801,8 @@ how the command-line arguments should be handled. The supplied actions are: >>> parser.parse_args('--str --int'.split()) Namespace(types=[, ]) -* ``'extend'`` - This stores a list and appends each item from the multi-value - argument list to it. +* ``'extend'`` - This appends each item from a multi-value + argument to a list. The ``'extend'`` action is typically used with the nargs_ keyword argument value ``'+'`` or ``'*'``. Note that when nargs_ is ``None`` (the default) or ``'?'``, each @@ -816,7 +816,7 @@ how the command-line arguments should be handled. The supplied actions are: .. versionadded:: 3.8 -* ``'count'`` - This counts the number of times a keyword argument occurs. For +* ``'count'`` - This counts the number of times an argument occurs. For example, this is useful for increasing verbosity levels:: >>> parser = argparse.ArgumentParser()