From ad68a5e4e936b942d28f53799b460518a1764200 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 9 Dec 2025 22:50:51 +0000 Subject: [PATCH 1/4] Python: Add modelling for `zstd.compression` See https://docs.python.org/3/library/compression.zstd.html for information about this library. As far as I can tell, the `zstd` library is not vulnerable to things like ZipSlip, but it _could_ be vulnerable to a decompression bomb attack, so I extended those models accordingly. --- .../python/security/DecompressionBomb.qll | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/python/ql/src/experimental/semmle/python/security/DecompressionBomb.qll b/python/ql/src/experimental/semmle/python/security/DecompressionBomb.qll index a2e50d0ade5d..87a47ebeb00c 100644 --- a/python/ql/src/experimental/semmle/python/security/DecompressionBomb.qll +++ b/python/ql/src/experimental/semmle/python/security/DecompressionBomb.qll @@ -364,6 +364,46 @@ module Lzma { } } +/** Provides sinks and additional taint steps related to the `zstd` library in Python 3.14+. */ +module Zstd { + private API::Node zstdInstance() { + result = API::moduleImport("compression").getMember("zstd").getMember(["ZstdFile", "open"]) + } + + /** + * The Decompression Sinks of `zstd` library + * + * `zstd.open(sink)` + * `zstd.ZstdFile(sink)` + * + * only read mode is sink + */ + class DecompressionSink extends DecompressionBomb::Sink { + DecompressionSink() { + exists(API::CallNode zstdCall | zstdCall = zstdInstance().getACall() | + this = zstdCall.getParameter(0, "filename").asSink() and + ( + not exists( + zstdCall + .getParameter(1, "mode") + .getAValueReachingSink() + .asExpr() + .(StringLiteral) + .getText() + ) or + zstdCall + .getParameter(1, "mode") + .getAValueReachingSink() + .asExpr() + .(StringLiteral) + .getText() + .matches("%r%") + ) + ) + } + } +} + /** * `io.TextIOWrapper(ip, encoding='utf-8')` like following: * ```python From e6e05012c848e0c617e1a7c83320bc44e839a541 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 9 Dec 2025 22:55:40 +0000 Subject: [PATCH 2/4] Python: Add change note --- .../2025-12-09-add-modelling-of-zstd-compression.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2025-12-09-add-modelling-of-zstd-compression.md diff --git a/python/ql/lib/change-notes/2025-12-09-add-modelling-of-zstd-compression.md b/python/ql/lib/change-notes/2025-12-09-add-modelling-of-zstd-compression.md new file mode 100644 index 000000000000..8ec42ca0db2e --- /dev/null +++ b/python/ql/lib/change-notes/2025-12-09-add-modelling-of-zstd-compression.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The `compression.zstd` library (added in Python 3.14) is now supported by the `py/decompression-bomb` query. From 6af9fd816fb6eba9a1513992d98fde314e3e7483 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 10 Dec 2025 13:20:19 +0000 Subject: [PATCH 3/4] Python: Make space for new test --- .../CWE-409/DecompressionBombs.expected | 48 +++++++++---------- .../query-tests/Security/CWE-409/test.py | 5 ++ 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected index 87b07df086fa..a8355a5a5fc4 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected @@ -36,15 +36,15 @@ edges | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:45:17:45:25 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:49:15:49:23 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:50:19:50:27 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:54:40:54:48 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:56:23:56:31 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:57:21:57:29 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:59:40:59:48 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:60:22:60:30 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:61:21:61:29 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:62:42:62:50 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:63:23:63:31 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:64:36:64:44 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:61:23:61:31 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:62:21:62:29 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:64:40:64:48 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:65:22:65:30 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:66:21:66:29 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:67:42:67:50 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:68:23:68:31 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:69:36:69:44 | ControlFlowNode for file_path | provenance | | nodes | test.py:10:16:10:24 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:11:5:11:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | @@ -79,15 +79,15 @@ nodes | test.py:45:17:45:25 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:49:15:49:23 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:50:19:50:27 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:54:40:54:48 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:56:23:56:31 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:57:21:57:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:59:40:59:48 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:60:22:60:30 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:61:21:61:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:62:42:62:50 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:63:23:63:31 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:64:36:64:44 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:61:23:61:31 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:62:21:62:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:64:40:64:48 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:65:22:65:30 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:66:21:66:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:67:42:67:50 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:68:23:68:31 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:69:36:69:44 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | subpaths #select | test.py:11:5:11:52 | ControlFlowNode for Attribute() | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | @@ -107,12 +107,12 @@ subpaths | test.py:45:17:45:25 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:45:17:45:25 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:49:15:49:23 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:49:15:49:23 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:50:19:50:27 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:50:19:50:27 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:54:40:54:48 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:54:40:54:48 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:56:23:56:31 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:56:23:56:31 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:57:21:57:29 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:57:21:57:29 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:59:40:59:48 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:59:40:59:48 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:60:22:60:30 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:60:22:60:30 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:61:21:61:29 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:61:21:61:29 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:62:42:62:50 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:62:42:62:50 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:63:23:63:31 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:63:23:63:31 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:64:36:64:44 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:64:36:64:44 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:61:23:61:31 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:61:23:61:31 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:62:21:62:29 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:62:21:62:29 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:64:40:64:48 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:64:40:64:48 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:65:22:65:30 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:65:22:65:30 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:66:21:66:29 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:66:21:66:29 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:67:42:67:50 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:67:42:67:50 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:68:23:68:31 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:68:23:68:31 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:69:36:69:44 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:69:36:69:44 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/test.py b/python/ql/test/experimental/query-tests/Security/CWE-409/test.py index 06113bf9fe46..352bd1f425f3 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/test.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/test.py @@ -49,6 +49,11 @@ async def bomb(file_path): gzip.open(file_path) # $ result=BAD gzip.GzipFile(file_path) # $ result=BAD +# from compression import zstd +# +# zstd.open(file_path) # $ result=BAD +# zstd.ZstdFile(file_path).read() # $ result=BAD +# import pandas pandas.read_csv(filepath_or_buffer=file_path) # $ result=BAD From d2c7147480b297e8fec5ec735230c24d9b943a7a Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 10 Dec 2025 13:52:13 +0000 Subject: [PATCH 4/4] Python: Add new test --- .../Security/CWE-409/DecompressionBombs.expected | 6 ++++++ .../experimental/query-tests/Security/CWE-409/test.py | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected index a8355a5a5fc4..17c28aa1d95d 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected @@ -36,6 +36,8 @@ edges | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:45:17:45:25 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:49:15:49:23 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:50:19:50:27 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:54:15:54:23 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:55:19:55:27 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:59:40:59:48 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:61:23:61:31 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:62:21:62:29 | ControlFlowNode for file_path | provenance | | @@ -79,6 +81,8 @@ nodes | test.py:45:17:45:25 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:49:15:49:23 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:50:19:50:27 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:54:15:54:23 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:55:19:55:27 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:59:40:59:48 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:61:23:61:31 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:62:21:62:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | @@ -107,6 +111,8 @@ subpaths | test.py:45:17:45:25 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:45:17:45:25 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:49:15:49:23 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:49:15:49:23 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:50:19:50:27 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:50:19:50:27 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:54:15:54:23 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:54:15:54:23 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:55:19:55:27 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:55:19:55:27 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:59:40:59:48 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:59:40:59:48 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:61:23:61:31 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:61:23:61:31 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:62:21:62:29 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:62:21:62:29 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/test.py b/python/ql/test/experimental/query-tests/Security/CWE-409/test.py index 352bd1f425f3..feb6b1293bb2 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/test.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/test.py @@ -49,11 +49,11 @@ async def bomb(file_path): gzip.open(file_path) # $ result=BAD gzip.GzipFile(file_path) # $ result=BAD -# from compression import zstd -# -# zstd.open(file_path) # $ result=BAD -# zstd.ZstdFile(file_path).read() # $ result=BAD -# + from compression import zstd + + zstd.open(file_path) # $ result=BAD + zstd.ZstdFile(file_path).read() # $ result=BAD + import pandas pandas.read_csv(filepath_or_buffer=file_path) # $ result=BAD