Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c9a7699
feat: extend the struct parser a bit more
MrIndeciso Oct 28, 2024
f8f3408
feat: implement size_of to make nested field chains work
MrIndeciso Oct 28, 2024
dd253da
ci: upgrade test runner to Python 3.13
MrIndeciso Oct 28, 2024
108848f
feat: allow typedefs and multiple structs in definition_to_type
MrIndeciso Oct 28, 2024
6468aba
fix: remove indentation when inline-printing ptrs
MrIndeciso Oct 28, 2024
3811b9b
fix: remove erroneous print and provide owner when inflating struct m…
MrIndeciso Oct 29, 2024
686135d
style: small refactor in struct_impl.py
MrIndeciso Oct 29, 2024
fa987c9
style: fix typing in struct_impl
MrIndeciso Oct 29, 2024
304044a
feat: implement a way to specify offsets in a struct
MrIndeciso Oct 29, 2024
2268862
fix: save last result in PARSED_STRUCTS
MrIndeciso Oct 29, 2024
00bb181
feat: implement iterate_annotation_chain to allow subclassing in type…
MrIndeciso Oct 29, 2024
697c29c
feat: make structs instantiatable, to generate their in-memory repres…
MrIndeciso Nov 4, 2024
a2d2a9a
style: remove unused import
MrIndeciso Nov 4, 2024
470ac9f
fix: import Self from typing_extensions
MrIndeciso Nov 4, 2024
8037e7e
feat: add `inflate` shortcut
MrIndeciso Nov 5, 2024
6c62a57
fix: iterate over the annotation chain in correct order
MrIndeciso Nov 5, 2024
ae9242a
style: autorefactor whole project
MrIndeciso Nov 5, 2024
80a4b82
fix: remove wrongly-committed print
MrIndeciso Nov 5, 2024
7dc804d
fix: don't use .size of struct_impl itself, retrieve size from backin…
MrIndeciso May 18, 2025
461c336
test: ensure size is a valid attribute name in structs and nested str…
MrIndeciso May 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
timeout-minutes: 15
strategy:
matrix:
python-version: ["3.10", "3.12"]
python-version: ["3.10", "3.13"]

steps:
- uses: actions/checkout@v4
Expand Down
11 changes: 7 additions & 4 deletions libdestruct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,36 @@
# Licensed under the MIT license. See LICENSE file in the project root for details.
#

try: # pragma: no cover
try: # pragma: no cover
from rich.traceback import install

install()
except ImportError: # pragma: no cover
except ImportError: # pragma: no cover
pass

from libdestruct.c import c_int, c_long, c_str, c_uint, c_ulong
from libdestruct.common import ptr
from libdestruct.common.array import array, array_of
from libdestruct.common.attributes import offset
from libdestruct.common.enum import enum, enum_of
from libdestruct.common.ptr import ptr
from libdestruct.common.struct import ptr_to, ptr_to_self, struct
from libdestruct.libdestruct import inflater
from libdestruct.libdestruct import inflate, inflater

__all__ = [
"array",
"array_of",
"offset",
"c_int",
"c_long",
"c_str",
"c_uint",
"c_ulong",
"enum",
"enum_of",
"inflate",
"inflater",
"struct",
"ptr",
"ptr_to",
"ptr_to_self",
]

Check failure on line 39 in libdestruct/__init__.py

View workflow job for this annotation

GitHub Actions / lint (3.12)

Ruff (RUF022)

libdestruct/__init__.py:22:11: RUF022 `__all__` is not sorted

Check failure on line 39 in libdestruct/__init__.py

View workflow job for this annotation

GitHub Actions / lint (3.12)

Ruff (RUF022)

libdestruct/__init__.py:22:11: RUF022 `__all__` is not sorted
74 changes: 74 additions & 0 deletions libdestruct/backing/fake_resolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#
# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct).
# Copyright (c) 2024 Roberto Alessandro Bertolini. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for details.
#

from __future__ import annotations

from libdestruct.backing.resolver import Resolver


class FakeResolver(Resolver):
"""A class that can resolve elements in a simulated memory storage."""

def __init__(self: FakeResolver, memory: dict | None = None, address: int | None = 0) -> FakeResolver:
"""Initializes a basic fake resolver."""
self.memory = memory if memory is not None else {}
self.address = address
self.parent = None
self.offset = None

def resolve_address(self: FakeResolver) -> int:
"""Resolves self's address, mainly used by children to determine their own address."""
if self.address is not None:
return self.address

return self.parent.resolve_address() + self.offset

def relative_from_own(self: FakeResolver, address_offset: int, _: int) -> FakeResolver:
"""Creates a resolver that references a parent, such that a change in the parent is propagated on the child."""
new_resolver = FakeResolver(self.memory, None)
new_resolver.parent = self
new_resolver.offset = address_offset
return new_resolver

def absolute_from_own(self: FakeResolver, address: int) -> FakeResolver:
"""Creates a resolver that has an absolute reference to an object, from the parent's view."""
return FakeResolver(self.memory, address)

def resolve(self: FakeResolver, size: int, _: int) -> bytes:
"""Resolves itself, providing the bytes it references for the specified size and index."""
address = self.resolve_address()
# We store data in the dictionary as 4K pages
page_address = address & ~0xFFF
page_offset = address & 0xFFF

result = b""

while size:
page = self.memory.get(page_address, b"\x00" * (0x1000 - page_offset))
page_size = min(size, 0x1000 - page_offset)
result += page[page_offset : page_offset + page_size]
size -= page_size
page_address += 0x1000
page_offset = 0

return result

def modify(self: FakeResolver, size: int, _: int, value: bytes) -> None:
"""Modifies itself in memory."""
address = self.resolve_address()
# We store data in the dictionary as 4K pages
page_address = address & ~0xFFF
page_offset = address & 0xFFF

while size:
page = self.memory.get(page_address, b"\x00" * 0x1000)
page_size = min(size, 0x1000 - page_offset)
page = page[:page_offset] + value[:page_size] + page[page_offset + page_size :]
self.memory[page_address] = page
size -= page_size
value = value[page_size:]
page_address += 0x1000
page_offset = 0
2 changes: 1 addition & 1 deletion libdestruct/backing/memory_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from libdestruct.backing.resolver import Resolver

if TYPE_CHECKING: # pragma: no cover
if TYPE_CHECKING: # pragma: no cover
from collections.abc import MutableSequence


Expand Down
2 changes: 1 addition & 1 deletion libdestruct/c/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from libdestruct.c.c_integer_types import c_char, c_int, c_long, c_short, c_uchar, c_uint, c_ulong, c_ushort
from libdestruct.c.c_str import c_str

__all__ = ["c_char", "c_uchar", "c_short", "c_ushort", "c_int", "c_uint", "c_long", "c_ulong", "c_str"]

Check failure on line 10 in libdestruct/c/__init__.py

View workflow job for this annotation

GitHub Actions / lint (3.12)

Ruff (RUF022)

libdestruct/c/__init__.py:10:11: RUF022 `__all__` is not sorted

Check failure on line 10 in libdestruct/c/__init__.py

View workflow job for this annotation

GitHub Actions / lint (3.12)

Ruff (RUF022)

libdestruct/c/__init__.py:10:11: RUF022 `__all__` is not sorted

import libdestruct.c.base_type_inflater # noqa: F401
import libdestruct.c.base_type_inflater
import libdestruct.c.ctypes_generic_field # noqa: F401
10 changes: 8 additions & 2 deletions libdestruct/c/ctypes_generic_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from libdestruct.c.ctypes_generic import _ctypes_generic
from libdestruct.common.type_registry import TypeRegistry

registry = TypeRegistry()


def ctypes_type_handler(obj_type: type) -> type[_ctypes_generic]:
"""Return the ctypes type handler for the given object type.
Expand All @@ -21,11 +23,15 @@ def ctypes_type_handler(obj_type: type) -> type[_ctypes_generic]:
if not issubclass(obj_type, _SimpleCData):
raise TypeError(f"Unsupported object type: {obj_type}.")

return type(
typ = type(
f"ctypes_{obj_type.__name__}",
(_ctypes_generic,),
{"backing_type": obj_type, "size": sizeof(obj_type)},
)

registry.register_mapping(typ, typ)

return typ


TypeRegistry().register_type_handler(_SimpleCData, ctypes_type_handler)
registry.register_type_handler(_SimpleCData, ctypes_type_handler)
124 changes: 105 additions & 19 deletions libdestruct/c/struct_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,58 @@

from pycparser import c_ast, c_parser

from libdestruct.common.array.array_of import array_of
from libdestruct.common.ptr.ptr_factory import ptr_to, ptr_to_self
from libdestruct.common.struct import struct

if TYPE_CHECKING:
from libdestruct.common.obj import obj


PARSED_STRUCTS = {}
"""A cache for parsed struct definitions, indexed by name."""

TYPEDEFS = {}
"""A cache for parsed type definitions, indexed by name."""


def definition_to_type(definition: str) -> type[obj]:
"""Converts a C struct definition to a struct object."""
parser = c_parser.CParser()

# If the definition contains includes, we must expand them.
if "#include" in definition:
definition = cleanup_attributes(expand_includes(definition))
force_more_tops = True
elif "typedef" in definition:
force_more_tops = True
else:
force_more_tops = False

try:
ast = parser.parse(definition)
except c_parser.ParseError as e:
raise ValueError("Invalid definition. Please add the necessary includes if using non-standard type definitions.") from e

if not force_more_tops and len(ast.ext) != 1:
raise ValueError("Definition must contain exactly one top object.")
raise ValueError(
"Invalid definition. Please add the necessary includes if using non-standard type definitions."

Check failure on line 44 in libdestruct/c/struct_parser.py

View workflow job for this annotation

GitHub Actions / lint (3.12)

Ruff (COM812)

libdestruct/c/struct_parser.py:44:108: COM812 Trailing comma missing

Check failure on line 44 in libdestruct/c/struct_parser.py

View workflow job for this annotation

GitHub Actions / lint (3.12)

Ruff (COM812)

libdestruct/c/struct_parser.py:44:108: COM812 Trailing comma missing
) from e

# If force_more_tops is True, we take the last top object.
# This is useful when a struct definition is preceded by typedefs.
root = ast.ext[-1].type if force_more_tops else ast.ext[0].type
# We assume that the root declaration is the last one.
root = ast.ext[-1].type

if not isinstance(root, c_ast.Struct):
raise TypeError("Definition must be a struct.")

return struct_to_type(root)
# We parse each declaration in the definition, except the last one, if it is a struct.
for decl in ast.ext[:-1]:
if isinstance(decl.type, c_ast.Struct):
struct_node = decl.type

if struct_node.name:
PARSED_STRUCTS[struct_node.name] = struct_to_type(struct_node)
elif isinstance(decl, c_ast.Typedef):
name, definition = typedef_to_pair(decl)
TYPEDEFS[name] = definition

result = struct_to_type(root)

PARSED_STRUCTS[root.name] = result

return result


def struct_to_type(struct_node: c_ast.Struct) -> type[struct]:
Expand All @@ -58,21 +75,69 @@

fields = {}

if not struct_node.decls and struct_node.name in PARSED_STRUCTS:
# We can check if the struct is already parsed.
return PARSED_STRUCTS[struct_node.name]
elif not struct_node.decls:
raise ValueError("Struct must have fields.")

for decl in struct_node.decls:
name = decl.name
typ = type_decl_to_type(decl.type)
typ = type_decl_to_type(decl.type, struct_node)
fields[name] = typ

type_name = struct_node.name if struct_node.name else "anon_struct"

return type(type_name, (struct,), {"__annotations__": fields})


def type_decl_to_type(decl: c_ast.TypeDecl) -> type[obj]:
def ptr_to_type(ptr: c_ast.PtrDecl, parent: c_ast.Struct | None = None) -> type[obj]:
"""Converts a C pointer to a type."""
if not isinstance(ptr, c_ast.PtrDecl):
raise TypeError("Definition must be a pointer.")

if not isinstance(ptr.type, c_ast.TypeDecl):
raise TypeError("Definition must be a type declaration.")

# Special case: this is a pointer to self
# Note that ptr can either be a struct or an identifier.
ptr_name = ptr.type.type.name if isinstance(ptr.type.type, c_ast.Struct) else ptr.type.type.names[0]
if parent and ptr_name == parent.name:
return ptr_to_self()

typ = type_decl_to_type(ptr.type)

return ptr_to(typ)


def arr_to_type(arr: c_ast.ArrayDecl) -> type[obj]:
"""Converts a C array to a type."""
if not isinstance(arr, c_ast.ArrayDecl):
raise TypeError("Definition must be an array.")

if not isinstance(arr.type, c_ast.TypeDecl) and not isinstance(arr.type, c_ast.PtrDecl):
raise TypeError("Definition must be a type declaration.")

typ = ptr_to_type(arr.type) if isinstance(arr.type, c_ast.PtrDecl) else type_decl_to_type(arr.type)

return array_of(typ, int(arr.dim.value))


def type_decl_to_type(decl: c_ast.TypeDecl, parent: c_ast.Struct | None = None) -> type[obj]:
"""Converts a C type declaration to a type."""
if not isinstance(decl, c_ast.TypeDecl):
if (
not isinstance(decl, c_ast.TypeDecl)
and not isinstance(decl, c_ast.PtrDecl)
and not isinstance(decl, c_ast.ArrayDecl)
):
raise TypeError("Definition must be a type declaration.")

if isinstance(decl, c_ast.PtrDecl):
return ptr_to_type(decl, parent)

if isinstance(decl, c_ast.ArrayDecl):
return arr_to_type(decl)

if isinstance(decl.type, c_ast.Struct):
return struct_to_type(decl.type)

Expand All @@ -82,11 +147,25 @@
raise TypeError("Unsupported type.")


def typedef_to_pair(typedef: c_ast.Typedef) -> tuple[str, type[obj]]:
"""Converts a C typedef to a pair of name and definition."""
if not isinstance(typedef, c_ast.Typedef):
raise TypeError("Definition must be a typedef.")

if not isinstance(typedef.type, c_ast.TypeDecl):
raise TypeError("Definition must be a type declaration.")

name = "".join(typedef.name)
definition = type_decl_to_type(typedef.type)

return name, definition


def to_uniform_name(name: str) -> str:
"""Converts a name to a uniform name."""
name = name.replace("unsigned", "u")
name = name.replace("_Bool", "bool")
name = name.replace("uchar", "ubyte") # uchar is not a valid ctypes type
name = name.replace("uchar", "ubyte") # uchar is not a valid ctypes type

# We have to convert each intX, uintX, intX_t, uintX_t to the original char, short etc.
name = name.replace("uint8_t", "ubyte")
Expand All @@ -95,6 +174,9 @@
name = name.replace("int32_t", "int")
name = name.replace("int64_t", "longlong")

# We have to convert uintptr_t
name = name.replace("uintptr_t", "ulonglong")

# Only size_t, ssize_t and time_t can end with _t
if not any(x in name for x in ["size", "ssize", "time"]):
name = name.replace("_t", "")
Expand All @@ -104,20 +186,20 @@

def expand_includes(definition: str) -> str:
"""Expands includes in a C definition using the C preprocessor."""
# TODO: cache this result between subsequent runs of the same script

Check failure on line 189 in libdestruct/c/struct_parser.py

View workflow job for this annotation

GitHub Actions / lint (3.12)

Ruff (FIX002)

libdestruct/c/struct_parser.py:189:7: FIX002 Line contains TODO, consider resolving the issue

Check failure on line 189 in libdestruct/c/struct_parser.py

View workflow job for this annotation

GitHub Actions / lint (3.12)

Ruff (FIX002)

libdestruct/c/struct_parser.py:189:7: FIX002 Line contains TODO, consider resolving the issue
with tempfile.NamedTemporaryFile(mode="w", suffix=".c") as f:
f.write(definition)
f.flush()

result = subprocess.run(["cc", "-std=c99", "-E", f.name], capture_output=True, text=True, check=True) # noqa: S607
result = subprocess.run(["cc", "-std=c99", "-E", f.name], capture_output=True, text=True, check=True) # noqa: S607

return result.stdout


def cleanup_attributes(definition: str) -> str:
"""Cleans up attributes in a C definition."""
# Remove __attribute__ ((...)) from the definition.
pattern = r"__attribute__\s*\(\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\)\)" # ChatGPT provided this, don't ask me
pattern = r"__attribute__\s*\(\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\)\)" # ChatGPT provided this, don't ask me
return re.sub(pattern, "", definition)


Expand All @@ -139,4 +221,8 @@
if hasattr(ctypes, ctypes_name):
return getattr(ctypes, ctypes_name)

# Check if we have a typedef to resolve this
if identifier_name in TYPEDEFS:
return TYPEDEFS[identifier_name]

raise ValueError(f"Unsupported identifier: {identifier_name}.")
4 changes: 0 additions & 4 deletions libdestruct/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,3 @@
# Copyright (c) 2024 Roberto Alessandro Bertolini. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for details.
#

from libdestruct.common.ptr import ptr

__all__ = ["ptr"]
Loading
Loading