diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1086b42620479d..6acc156ebff713 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -126,6 +126,9 @@ Doc/howto/clinic.rst @erlend-aasland @AA-Turner # C Analyser Tools/c-analyzer/ @ericsnowcurrently +# C API Documentation Checks +Tools/check-c-api-docs/ @ZeroIntensity + # Fuzzing Modules/_xxtestfuzz/ @ammaraskar diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8e15400e4978eb..3d889fa128e261 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -142,6 +142,9 @@ jobs: - name: Check for unsupported C global variables if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME run: make check-c-globals + - name: Check for undocumented C APIs + run: make check-c-api-docs + build-windows: name: >- diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 2e6c2bbdf19409..665b3e843dd3a5 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1621,6 +1621,14 @@ def __eq__(self, other): self.assertEqual(len(d), 1) + def test_split_table_update_with_str_subclass(self): + class MyStr(str): pass + class MyClass: pass + obj = MyClass() + obj.attr = 1 + obj.__dict__[MyStr('attr')] = 2 + self.assertEqual(obj.attr, 2) + class CAPITest(unittest.TestCase): diff --git a/Makefile.pre.in b/Makefile.pre.in index 086adbdf262c48..f3086ec1462b6b 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -3322,6 +3322,11 @@ check-c-globals: --format summary \ --traceback +# Check for undocumented C APIs. +.PHONY: check-c-api-docs +check-c-api-docs: + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/check-c-api-docs/main.py + # Find files with funny names .PHONY: funny funny: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-03-11-03-35.gh-issue-142218.44Fq_J.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-03-11-03-35.gh-issue-142218.44Fq_J.rst new file mode 100644 index 00000000000000..a8ce0fc65267d5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-03-11-03-35.gh-issue-142218.44Fq_J.rst @@ -0,0 +1,2 @@ +Fix crash when inserting into a split table dictionary with a non +:class:`str` key that matches an existing key. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ee1c173ae4abb0..e0eef7b46df4b2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1914,10 +1914,14 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, if (old_value != value) { _PyDict_NotifyEvent(interp, PyDict_EVENT_MODIFIED, mp, key, value); assert(old_value != NULL); - assert(!_PyDict_HasSplitTable(mp)); if (DK_IS_UNICODE(mp->ma_keys)) { - PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; - STORE_VALUE(ep, value); + if (_PyDict_HasSplitTable(mp)) { + STORE_SPLIT_VALUE(mp, ix, value); + } + else { + PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; + STORE_VALUE(ep, value); + } } else { PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[ix]; diff --git a/Tools/check-c-api-docs/ignored_c_api.txt b/Tools/check-c-api-docs/ignored_c_api.txt new file mode 100644 index 00000000000000..e81ffd51e193b2 --- /dev/null +++ b/Tools/check-c-api-docs/ignored_c_api.txt @@ -0,0 +1,93 @@ +# pydtrace_probes.h +PyDTrace_AUDIT +PyDTrace_FUNCTION_ENTRY +PyDTrace_FUNCTION_RETURN +PyDTrace_GC_DONE +PyDTrace_GC_START +PyDTrace_IMPORT_FIND_LOAD_DONE +PyDTrace_IMPORT_FIND_LOAD_START +PyDTrace_INSTANCE_DELETE_DONE +PyDTrace_INSTANCE_DELETE_START +PyDTrace_INSTANCE_NEW_DONE +PyDTrace_INSTANCE_NEW_START +PyDTrace_LINE +# fileobject.h +Py_FileSystemDefaultEncodeErrors +Py_FileSystemDefaultEncoding +Py_HasFileSystemDefaultEncoding +Py_UTF8Mode +# pyhash.h +Py_HASH_EXTERNAL +# exports.h +PyAPI_DATA +Py_EXPORTED_SYMBOL +Py_IMPORTED_SYMBOL +Py_LOCAL_SYMBOL +# modsupport.h +PyABIInfo_FREETHREADING_AGNOSTIC +# moduleobject.h +PyModuleDef_Type +# object.h +Py_INVALID_SIZE +Py_TPFLAGS_HAVE_VERSION_TAG +Py_TPFLAGS_INLINE_VALUES +Py_TPFLAGS_IS_ABSTRACT +# pyexpat.h +PyExpat_CAPI_MAGIC +PyExpat_CAPSULE_NAME +# pyport.h +Py_ALIGNED +Py_ARITHMETIC_RIGHT_SHIFT +Py_CAN_START_THREADS +Py_FORCE_EXPANSION +Py_GCC_ATTRIBUTE +Py_LL +Py_SAFE_DOWNCAST +Py_ULL +Py_VA_COPY +# unicodeobject.h +Py_UNICODE_SIZE +# cpython/methodobject.h +PyCFunction_GET_CLASS +# cpython/compile.h +PyCF_ALLOW_INCOMPLETE_INPUT +PyCF_COMPILE_MASK +PyCF_DONT_IMPLY_DEDENT +PyCF_IGNORE_COOKIE +PyCF_MASK +PyCF_MASK_OBSOLETE +PyCF_SOURCE_IS_UTF8 +# cpython/descrobject.h +PyDescr_COMMON +PyDescr_NAME +PyDescr_TYPE +PyWrapperFlag_KEYWORDS +# cpython/fileobject.h +PyFile_NewStdPrinter +PyStdPrinter_Type +Py_UniversalNewlineFgets +# cpython/setobject.h +PySet_MINSIZE +# cpython/ceval.h +PyUnstable_CopyPerfMapFile +PyUnstable_PerfTrampoline_CompileCode +PyUnstable_PerfTrampoline_SetPersistAfterFork +# cpython/genobject.h +PyAsyncGenASend_CheckExact +# cpython/longintrepr.h +PyLong_BASE +PyLong_MASK +PyLong_SHIFT +# cpython/pyerrors.h +PyException_HEAD +# cpython/pyframe.h +PyUnstable_EXECUTABLE_KINDS +PyUnstable_EXECUTABLE_KIND_BUILTIN_FUNCTION +PyUnstable_EXECUTABLE_KIND_METHOD_DESCRIPTOR +PyUnstable_EXECUTABLE_KIND_PY_FUNCTION +PyUnstable_EXECUTABLE_KIND_SKIP +# cpython/pylifecycle.h +Py_FrozenMain +# cpython/unicodeobject.h +PyUnicode_IS_COMPACT +PyUnicode_IS_COMPACT_ASCII diff --git a/Tools/check-c-api-docs/main.py b/Tools/check-c-api-docs/main.py new file mode 100644 index 00000000000000..6bdf80a9ae8985 --- /dev/null +++ b/Tools/check-c-api-docs/main.py @@ -0,0 +1,193 @@ +import re +from pathlib import Path +import sys +import _colorize +import textwrap + +SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(") +SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ") +SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)") +SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)") + +CPYTHON = Path(__file__).parent.parent.parent +INCLUDE = CPYTHON / "Include" +C_API_DOCS = CPYTHON / "Doc" / "c-api" +IGNORED = ( + (CPYTHON / "Tools" / "check-c-api-docs" / "ignored_c_api.txt") + .read_text() + .split("\n") +) + +for index, line in enumerate(IGNORED): + if line.startswith("#"): + IGNORED.pop(index) + +MISTAKE = """ +If this is a mistake and this script should not be failing, create an +issue and tag Peter (@ZeroIntensity) on it.\ +""" + + +def found_undocumented(singular: bool) -> str: + some = "an" if singular else "some" + s = "" if singular else "s" + these = "this" if singular else "these" + them = "it" if singular else "them" + were = "was" if singular else "were" + + return ( + textwrap.dedent( + f""" + Found {some} undocumented C API{s}! + + Python requires documentation on all public C API symbols, macros, and types. + If {these} API{s} {were} not meant to be public, prefix {them} with a + leading underscore (_PySomething_API) or move {them} to the internal C API + (pycore_*.h files). + + In exceptional cases, certain APIs can be ignored by adding them to + Tools/check-c-api-docs/ignored_c_api.txt + """ + ) + + MISTAKE + ) + + +def found_ignored_documented(singular: bool) -> str: + some = "a" if singular else "some" + s = "" if singular else "s" + them = "it" if singular else "them" + were = "was" if singular else "were" + they = "it" if singular else "they" + + return ( + textwrap.dedent( + f""" + Found {some} C API{s} listed in Tools/c-api-docs-check/ignored_c_api.txt, but + {they} {were} found in the documentation. To fix this, remove {them} from + ignored_c_api.txt. + """ + ) + + MISTAKE + ) + + +def is_documented(name: str) -> bool: + """ + Is a name present in the C API documentation? + """ + for path in C_API_DOCS.iterdir(): + if path.is_dir(): + continue + if path.suffix != ".rst": + continue + + text = path.read_text(encoding="utf-8") + if name in text: + return True + + return False + + +def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]: + """ + Scan a header file for C API functions. + """ + undocumented: list[str] = [] + documented_ignored: list[str] = [] + colors = _colorize.get_colors() + + def check_for_name(name: str) -> None: + documented = is_documented(name) + if documented and (name in IGNORED): + documented_ignored.append(name) + elif not documented and (name not in IGNORED): + undocumented.append(name) + + for function in SIMPLE_FUNCTION_REGEX.finditer(text): + name = function.group(2) + if not name.startswith("Py"): + continue + + check_for_name(name) + + for macro in SIMPLE_MACRO_REGEX.finditer(text): + name = macro.group(1) + if not name.startswith("Py"): + continue + + if "(" in name: + name = name[: name.index("(")] + + check_for_name(name) + + for inline in SIMPLE_INLINE_REGEX.finditer(text): + name = inline.group(2) + if not name.startswith("Py"): + continue + + check_for_name(name) + + for data in SIMPLE_DATA_REGEX.finditer(text): + name = data.group(1) + if not name.startswith("Py"): + continue + + check_for_name(name) + + # Remove duplicates and sort alphabetically to keep the output deterministic + undocumented = list(set(undocumented)) + undocumented.sort() + + if undocumented or documented_ignored: + print(f"{filename} {colors.RED}BAD{colors.RESET}") + for name in undocumented: + print(f"{colors.BOLD_RED}UNDOCUMENTED:{colors.RESET} {name}") + for name in documented_ignored: + print(f"{colors.BOLD_YELLOW}DOCUMENTED BUT IGNORED:{colors.RESET} {name}") + else: + print(f"{filename} {colors.GREEN}OK{colors.RESET}") + + return undocumented, documented_ignored + + +def main() -> None: + print("Scanning for undocumented C API functions...") + files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()] + all_missing: list[str] = [] + all_found_ignored: list[str] = [] + + for file in files: + if file.is_dir(): + continue + assert file.exists() + text = file.read_text(encoding="utf-8") + missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text) + all_found_ignored += ignored + all_missing += missing + + fail = False + to_check = [ + (all_missing, "missing", found_undocumented(len(all_missing) == 1)), + ( + all_found_ignored, + "documented but ignored", + found_ignored_documented(len(all_found_ignored) == 1), + ), + ] + for name_list, what, message in to_check: + if not name_list: + continue + + s = "s" if len(name_list) != 1 else "" + print(f"-- {len(name_list)} {what} C API{s} --") + for name in name_list: + print(f" - {name}") + print(message) + fail = True + + sys.exit(1 if fail else 0) + + +if __name__ == "__main__": + main() diff --git a/Tools/wasm/wasi/__main__.py b/Tools/wasm/wasi/__main__.py index 06903fd25abe44..d95cc99c8ea28b 100644 --- a/Tools/wasm/wasi/__main__.py +++ b/Tools/wasm/wasi/__main__.py @@ -271,7 +271,7 @@ def wasi_sdk_env(context): for env_var, binary_name in list(env.items()): env[env_var] = os.fsdecode(wasi_sdk_path / "bin" / binary_name) - if wasi_sdk_path != pathlib.Path("/opt/wasi-sdk"): + if not wasi_sdk_path.name.startswith("wasi-sdk"): for compiler in ["CC", "CPP", "CXX"]: env[compiler] += f" --sysroot={sysroot}"