diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9f11b75 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea/ diff --git a/ida_kernelcache.py b/ida_kernelcache.py index 5ff2efc..e406f43 100644 --- a/ida_kernelcache.py +++ b/ida_kernelcache.py @@ -5,5 +5,4 @@ # A script to import the ida_kernelcache module into IDA. # -import ida_kernelcache -import ida_kernelcache as kc +from __future__ import absolute_import diff --git a/ida_kernelcache/__init__.py b/ida_kernelcache/__init__.py index 972518b..361bfae 100644 --- a/ida_kernelcache/__init__.py +++ b/ida_kernelcache/__init__.py @@ -7,23 +7,25 @@ # This isn't kernelcache-specific, but it's useful to have access to in the interpreter and other # scripts. -import ida_utilities +from __future__ import absolute_import +from __future__ import print_function -import build_struct -import class_struct -import classes -import kernel -import kplist -import metaclass -import offset -import segment -import stub -import tagged_pointers -import vtable +from . import build_struct +from . import class_struct +from . import classes +from . import ida_utilities +from . import kernel +from . import kplist +from . import metaclass +from . import offset +from . import segment +from . import stub +from . import tagged_pointers +from . import vtable +from .classes import (ClassInfo, collect_class_info, class_info) +from .kplist import (kplist_parse) +from .segment import (kernelcache_kext) -from classes import (ClassInfo, collect_class_info, class_info) -from kplist import (kplist_parse) -from segment import (kernelcache_kext) def kernelcache_process(untag_pointers=True): """Process the kernelcache in IDA for the first time. @@ -44,34 +46,34 @@ def kernelcache_process(untag_pointers=True): import idc def autoanalyze(): idc.Wait() + autoanalyze() if (kernel.kernelcache_format == kernel.KC_12_MERGED and untag_pointers and idaapi.IDA_SDK_VERSION < 720): - print 'Processing tagged kernelcache pointers' + print('Processing tagged kernelcache pointers') tagged_pointers.untag_pointers() autoanalyze() segment.initialize_segments() - print 'Initializing data offsets' + print('Initializing data offsets') offset.initialize_data_offsets() autoanalyze() - print 'Initializing vtables' + print('Initializing vtables') vtable.initialize_vtables() autoanalyze() vtable.initialize_vtable_symbols() autoanalyze() metaclass.initialize_metaclass_symbols() if kernel.kernelcache_format == kernel.KC_11_NORMAL: - print 'Creating offset and stub symbols' + print('Creating offset and stub symbols') offset.initialize_offset_symbols() autoanalyze() stub.initialize_stub_symbols() autoanalyze() - print 'Propagating vtable method symbols' + print('Propagating vtable method symbols') vtable.initialize_vtable_method_symbols() - print 'Initializing class structs' + print('Initializing class structs') class_struct.initialize_vtable_structs() class_struct.initialize_class_structs() autoanalyze() - print 'Done' - + print('Done') diff --git a/ida_kernelcache/build_struct.py b/ida_kernelcache/build_struct.py index 34d383d..968daa8 100644 --- a/ida_kernelcache/build_struct.py +++ b/ida_kernelcache/build_struct.py @@ -5,20 +5,20 @@ # A module to build an IDA structure automatically from code accesses. # -import collections +from __future__ import absolute_import import idc -import idautils -import idaapi -import ida_utilities as idau +from . import ida_utilities as idau _log = idau.make_log(3, __name__) + def field_name(offset): """Automatically generated IDA structs have their fields named by their absolute offset.""" return 'field_{:x}'.format(offset) + def create_struct_fields(sid=None, name=None, accesses=None, create=False, base=0): """Create an IDA struct with fields corresponding to the specified access pattern. @@ -67,6 +67,5 @@ def create_struct_fields(sid=None, name=None, accesses=None, create=False, base= else: success = False _log(1, 'Could not add {}.{} for access ({}, {}): {}', name, member, offset, size, - ret) + ret) return success - diff --git a/ida_kernelcache/class_struct.py b/ida_kernelcache/class_struct.py index 0b3da6c..24be016 100644 --- a/ida_kernelcache/class_struct.py +++ b/ida_kernelcache/class_struct.py @@ -116,21 +116,24 @@ class size reported in the kernel may actually be rounded up. However, for the m corresponding method, and set the type of the field accordingly. """ +from __future__ import absolute_import + import collections -import idc -import idautils import idaapi +import idautils +import idc -import ida_utilities as idau -import build_struct -import classes -import data_flow -import symbol -import vtable +from . import build_struct +from . import classes +from . import data_flow +from . import ida_utilities as idau +from . import symbol +from . import vtable _log = idau.make_log(2, __name__) + #### Vtable generation ############################################################################ def _populate_vmethods_struct(sid, classinfo): @@ -145,14 +148,14 @@ def _populate_vmethods_struct(sid, classinfo): if index < super_nmethods: continue # Get the base name of the method (i.e., for Class::method(args), extract method). - sym = idau.get_ea_name(vmethod, user=True) + sym = idau.get_ea_name(vmethod, user=True) base = symbol.method_name(sym) if not base: base = 'method_{}'.format(index) base = symbol.make_ident(base) # We'll try to use the base as our method name, but if it already exists, try appending # "_1", "_2", etc. - name = base + name = base suffix = 0 while name in members: suffix += 1 @@ -166,6 +169,7 @@ def _populate_vmethods_struct(sid, classinfo): return False return True + def _populate_vtable_struct(sid, classinfo): """Populate the ::vtable struct.""" # For each ancestor from root down to us (inclusive), add our ::vmethods struct. @@ -192,6 +196,7 @@ def _populate_vtable_struct(sid, classinfo): return False return True + def _create_vmethods_struct(classinfo): """Create the ::vmethods struct for a C++ class.""" sid = idau.struct_create(classinfo.classname + '::vmethods') @@ -200,6 +205,7 @@ def _create_vmethods_struct(classinfo): return False return _populate_vmethods_struct(sid, classinfo) + def _create_vtable_struct(classinfo): """Create the ::vtable struct for a C++ class.""" sid = idau.struct_create(classinfo.classname + '::vtable') @@ -208,14 +214,16 @@ def _create_vtable_struct(classinfo): return False return _populate_vtable_struct(sid, classinfo) + def initialize_vtable_structs(): """Create IDA structs representing the C++ virtual method tables in the kernel.""" classes.collect_class_info() - for classinfo in classes.class_info.values(): + for classinfo in list(classes.class_info.values()): _create_vmethods_struct(classinfo) - for classinfo in classes.class_info.values(): + for classinfo in list(classes.class_info.values()): _create_vtable_struct(classinfo) + #### Classes based on struct slices ############################################################### def _create_class_structs__slices(classinfo, endmarkers=True): @@ -223,7 +231,7 @@ def _create_class_structs__slices(classinfo, endmarkers=True): classname = classinfo.classname # Open or create the structs. sidf = idau.struct_open(classname + '::fields', create=True) - sid = idau.struct_open(classname, create=True) + sid = idau.struct_open(classname, create=True) if sid is None or sidf is None: _log(0, 'Could not create class structs for {}', classname) return None @@ -244,6 +252,7 @@ def _create_class_structs__slices(classinfo, endmarkers=True): _log(0, 'Could not create {}::end', classname) return sid, sidf, fields_start + def _populate_fields_struct__slices(sid, classinfo, fields_start, accesses): """Fill in the members of the ::fields struct based on the accesses.""" # Sanity check. @@ -252,6 +261,7 @@ def _populate_fields_struct__slices(sid, classinfo, fields_start, accesses): # For each (offset, size) access, add a member to the struct. build_struct.create_struct_fields(sid, accesses=accesses, base=fields_start) + def _populate_wrapper_struct__slices(sid, classinfo): """Fill in the members of the wrapper struct.""" # First add the vtable pointer. @@ -285,24 +295,27 @@ def _populate_wrapper_struct__slices(sid, classinfo): offset += size return True + def _populate_class_structs__slices(classinfo, class_accesses, sid, sidf, fields_start): """Populate the IDA structs for a C++ class.""" _populate_fields_struct__slices(sidf, classinfo, fields_start, - class_accesses[classinfo.classname]) + class_accesses[classinfo.classname]) _populate_wrapper_struct__slices(sid, classinfo) + #### Classes based on unions ###################################################################### def _create_class_structs__unions(classinfo): """Create the IDA structs for a C++ class.""" classname = classinfo.classname sidf = idau.struct_open(classname + '::fields', create=True) - sid = idau.struct_open(classname, union=True, create=True) + sid = idau.struct_open(classname, union=True, create=True) if sid is None or sidf is None: _log(0, 'Could not create class structs for {}', classname) return None return sid, sidf + def _populate_fields_struct__unions(sid, classinfo, accesses): """Fill in the members of the ::fields struct based on the accesses.""" # Sanity check. @@ -311,6 +324,7 @@ def _populate_fields_struct__unions(sid, classinfo, accesses): # For each (offset, size) access, add a member to the struct. build_struct.create_struct_fields(sid, accesses=accesses) + def _populate_wrapper_struct__unions(sid, classinfo): """Fill in the members of the wrapper struct.""" # First add the vtable pointer. @@ -334,11 +348,13 @@ def _populate_wrapper_struct__unions(sid, classinfo): return False return True + def _populate_class_structs__unions(classinfo, class_accesses, sid, sidf): """Populate the IDA structs for a C++ class.""" _populate_fields_struct__unions(sidf, classinfo, class_accesses[classinfo.classname]) _populate_wrapper_struct__unions(sid, classinfo) + #### Class generation ############################################################################# CLASS_SLICES = 'slices' @@ -346,23 +362,27 @@ def _populate_class_structs__unions(classinfo, class_accesses, sid, sidf): DEFAULT_STYLE = CLASS_SLICES + def initialize_class_structs(style=DEFAULT_STYLE): """Create IDA structs representing the C++ classes in the kernel. Depends on initialize_vtable_structs. """ + # A generator that will yield (virtual_method, classname, X0). def virtual_methods(): - for classinfo in classes.class_info.values(): + for classinfo in list(classes.class_info.values()): for _, vmethod, _ in vtable.class_vtable_overrides(classinfo, new=True, methods=True): if not idau.is_function_start(vmethod): _log(3, 'Non-function virtual method {:#x} in class {}', vmethod, - classinfo.classname) + classinfo.classname) continue yield vmethod, classinfo.classname, idautils.procregs.X0.reg + # Do the standard processing. process_functions(virtual_methods(), style=style) + def _collect_all_class_accesses(functions): """Collect all accesses to each class by examining the functions. @@ -370,23 +390,26 @@ def _collect_all_class_accesses(functions): """ all_accesses = collections.defaultdict(lambda: collections.defaultdict(set)) for function, classname, register in functions: - data_flow.pointer_accesses(function=function, initialization={ function: { register: 0 } }, - accesses=all_accesses[classname]) + data_flow.pointer_accesses(function=function, initialization={function: {register: 0}}, + accesses=all_accesses[classname]) return all_accesses + def _classify_class_accesses(all_accesses, style): """Categorize each access by specific class and build a list of operands to convert. Arm64 only. """ - all_classes = set() + all_classes = set() class_accesses = collections.defaultdict(collections.Counter) class_operands = collections.defaultdict(set) + # Helper for logging. def log_addrs(addresses_and_deltas): return ', '.join('{:#x}'.format(ea) for ea, dt in addresses_and_deltas) + # For each class, look at the accesses associated with that class. - for classname, accesses in all_accesses.items(): + for classname, accesses in list(all_accesses.items()): classinfo = classes.class_info.get(classname) if not classinfo: _log(-1, 'Skipping non-existent class {}', classname) @@ -396,7 +419,7 @@ def log_addrs(addresses_and_deltas): # class, that's the class it goes with. ancestors = list(classinfo.ancestors(inclusive=True)) all_classes.update(ancestors) - for offset_and_size, addresses_and_deltas in accesses.items(): + for offset_and_size, addresses_and_deltas in list(accesses.items()): offset, size = offset_and_size # Accesses to offsets 0-8 are actually not considered part of the ::fields struct since # they technically access the vtable. Skip it. @@ -417,14 +440,14 @@ def log_addrs(addresses_and_deltas): superclass_size = ci.superclass.class_size if offset < superclass_size: _log(-1, 'Class {} has spanning access ({}, {}) from addresses {}', - classname, offset, size, log_addrs(addresses_and_deltas)) + classname, offset, size, log_addrs(addresses_and_deltas)) if style != CLASS_UNIONS: break # If the access is unaligned with respect to the size, it's more likely to be # incorrect. Log it, but continue. if offset % size != 0: _log(2, 'Class {} has unaligned access ({}, {}) from addresses {}', - classname, offset, size, log_addrs(addresses_and_deltas)) + classname, offset, size, log_addrs(addresses_and_deltas)) # Looks good, add it to the collection. class_accesses[ci.classname][offset_and_size] += len(addresses_and_deltas) class_operands[classname].update(addresses_and_deltas) @@ -433,12 +456,13 @@ def log_addrs(addresses_and_deltas): # Almost certainly this is caused when the same register is used for two different # classes, but the path that gets this class to this access is impossible to satisfy. _log(-1, 'Class {} has out-of-bounds access ({}, {}) from addresses {}', - classname, offset, size, log_addrs(addresses_and_deltas)) + classname, offset, size, log_addrs(addresses_and_deltas)) return all_classes, class_accesses, class_operands + def _convert_operands_to_struct_offsets(access_addresses): """Convert the operands that generated struct accesses into struct offsets.""" - for classname, addresses_and_deltas in access_addresses.items(): + for classname, addresses_and_deltas in list(access_addresses.items()): sid = idau.struct_open(classname) if sid is not None: for ea, delta in addresses_and_deltas: @@ -450,6 +474,7 @@ def _convert_operands_to_struct_offsets(access_addresses): _log(1, 'Could not convert {:#x} to struct offset for class {} ' 'delta {}', ea, classname, delta) + def _set_class_style(style): """Set the global class style.""" global _style_was_set, _create_class_structs, _populate_class_structs @@ -467,12 +492,13 @@ def _set_class_style(style): raise ValueError('Incompatible style {}', style) # Set the appropriate functions based on the style. if style == CLASS_SLICES: - _create_class_structs = _create_class_structs__slices + _create_class_structs = _create_class_structs__slices _populate_class_structs = _populate_class_structs__slices else: - _create_class_structs = _create_class_structs__unions + _create_class_structs = _create_class_structs__unions _populate_class_structs = _populate_class_structs__unions + def process_functions(functions, style=DEFAULT_STYLE): """Process additional functions. @@ -500,12 +526,13 @@ def process_functions(functions, style=DEFAULT_STYLE): if data is not None: class_structs[classinfo] = data # Populate the class's structs using the access tuples. - for classinfo, data in class_structs.items(): + for classinfo, data in list(class_structs.items()): _populate_class_structs(classinfo, class_accesses, *data) # Finally, convert each operand that generated an access into an appropriately typed struct # offset reference. _convert_operands_to_struct_offsets(class_operands) + #### Vtable type propagation ###################################################################### def _propagate_virtual_method_type_for_method(classinfo, class_vindex, vmethod): @@ -526,15 +553,17 @@ def _propagate_virtual_method_type_for_method(classinfo, class_vindex, vmethod): vmethod_mid = idc.GetMemberId(vmethods_sid, vmethod_offset) if not bool(idc.SetType(vmethod_mid, vmethod_ptr_type)): _log(2, 'Could not set vmethod field type: {:x}, {}, {}', vmethod, classinfo.classname, - class_vindex) + class_vindex) return False return True + def _propagate_virtual_method_types_for_class(classinfo): """Propagate the types of a class's virtual methods to the vtable struct.""" for relative_index, vmethod in enumerate(vtable.class_vtable_methods(classinfo, new=True)): _propagate_virtual_method_type_for_method(classinfo, relative_index, vmethod) + def propagate_virtual_method_types_to_vtable_structs(): """Propagate the types of virtual methods to the corresponding entries in the vtables. @@ -543,6 +572,5 @@ def propagate_virtual_method_types_to_vtable_structs(): By default, IDA will guess a type with an empty argument list for any function whose symbol includes an unknown struct type, which inhibits proper type inference. """ - for classinfo in classes.class_info.values(): + for classinfo in list(classes.class_info.values()): _propagate_virtual_method_types_for_class(classinfo) - diff --git a/ida_kernelcache/classes.py b/ida_kernelcache/classes.py index 5c0555f..1858291 100644 --- a/ida_kernelcache/classes.py +++ b/ida_kernelcache/classes.py @@ -7,9 +7,13 @@ # information about C++ classes and populate global variables with the result. # -import collect_classes -import ida_utilities as idau -import vtable +from __future__ import absolute_import + +from builtins import object + +from . import collect_classes +from . import ida_utilities as idau +from . import vtable class_info = {} """A global map from class names to ClassInfo objects. See collect_class_info().""" @@ -17,18 +21,19 @@ vtables = {} """A global map from the address each virtual method tables in the kernelcache to its length.""" + class ClassInfo(object): """Information about a C++ class in a kernelcache.""" def __init__(self, classname, metaclass, vtable, vtable_length, class_size, superclass_name, - meta_superclass): - self.superclass = None - self.subclasses = set() - self.classname = classname - self.metaclass = metaclass - self.vtable = vtable - self.vtable_length = vtable_length - self.class_size = class_size + meta_superclass): + self.superclass = None + self.subclasses = set() + self.classname = classname + self.metaclass = metaclass + self.vtable = vtable + self.vtable_length = vtable_length + self.class_size = class_size self.superclass_name = superclass_name self.meta_superclass = meta_superclass @@ -37,10 +42,11 @@ def hex(x): if x is None: return repr(None) return '{:#x}'.format(x) + return 'ClassInfo({!r}, {}, {}, {}, {}, {!r}, {})'.format( - self.classname, hex(self.metaclass), hex(self.vtable), - self.vtable_length, self.class_size, self.superclass_name, - hex(self.meta_superclass)) + self.classname, hex(self.metaclass), hex(self.vtable), + self.vtable_length, self.class_size, self.superclass_name, + hex(self.meta_superclass)) @property def vtable_methods(self): @@ -82,6 +88,7 @@ def descendants(self, inclusive=False): for descendant in subclass.descendants(inclusive=True): yield descendant + def collect_class_info(): """Collect information about C++ classes defined in a kernelcache. diff --git a/ida_kernelcache/collect_classes.py b/ida_kernelcache/collect_classes.py index 7200aec..ea065cf 100644 --- a/ida_kernelcache/collect_classes.py +++ b/ida_kernelcache/collect_classes.py @@ -5,36 +5,47 @@ # Collects information about C++ classes in a kernelcache. # +from __future__ import absolute_import + from collections import defaultdict -import idc -import idautils import idaapi - -import ida_utilities as idau -import classes -import segment -import symbol -import vtable +import idautils +import idc +import six +from builtins import next +from builtins import object +from builtins import range +from six.moves import range + +from . import classes +from . import ida_utilities as idau +from . import segment +from . import symbol +from . import vtable _log = idau.make_log(1, __name__) # IDK where IDA defines these. -_MEMOP_PREINDEX = 0x20 +_MEMOP_PREINDEX = 0x20 _MEMOP_POSTINDEX = 0x80 -_MEMOP_WBINDEX = _MEMOP_PREINDEX | _MEMOP_POSTINDEX +_MEMOP_WBINDEX = _MEMOP_PREINDEX | _MEMOP_POSTINDEX + class _Regs(object): """A set of registers for _emulate_arm64.""" - class _Unknown: + class _Unknown(object): """A wrapper class indicating that the value is unknown.""" + def __add__(self, other): return _Regs.Unknown + def __radd__(self, other): return _Regs.Unknown - def __nonzero__(self): + + def __bool__(self): return False _reg_names = idautils.GetRegisterList() @@ -53,7 +64,7 @@ def clear(self, reg): pass def _reg(self, reg): - if isinstance(reg, (int, long)): + if isinstance(reg, six.integer_types): reg = _Regs._reg_names[reg] return reg @@ -69,11 +80,13 @@ def __setitem__(self, reg, value): else: self._regs[self._reg(reg)] = value & 0xffffffffffffffff + def _emulate_arm64(start, end, on_BL=None, on_RET=None): """A very basic partial Arm64 emulator that does just enough to find OSMetaClass information.""" # Super basic emulation. reg = _Regs() + def load(addr, dtyp): if not addr: return None @@ -84,9 +97,11 @@ def load(addr, dtyp): else: return None return idau.read_word(addr, size) + def cleartemps(): for t in ['X{}'.format(i) for i in range(0, 19)]: reg.clear(t) + for insn in idau.Instructions(start, end): _log(11, 'Processing instruction {:#x}', insn.ea) mnem = insn.get_canon_mnem() @@ -124,6 +139,7 @@ def cleartemps(): _log(10, 'Unrecognized instruction at address {:#x}', insn.ea) reg.clearall() + class _OneToOneMapFactory(object): """A factory to extract the largest one-to-one submap.""" @@ -138,7 +154,7 @@ def add_link(self, a, b): def _make_unique_oneway(self, xs_to_ys, ys_to_xs, bad_x=None): """Internal helper to make one direction unique.""" - for x, ys in xs_to_ys.items(): + for x, ys in list(xs_to_ys.items()): if len(ys) != 1: if bad_x: bad_x(x, ys) @@ -149,7 +165,7 @@ def _make_unique_oneway(self, xs_to_ys, ys_to_xs, bad_x=None): def _build_oneway(self, xs_to_ys): """Build a one-way mapping after pruning.""" x_to_y = dict() - for x, ys in xs_to_ys.items(): + for x, ys in list(xs_to_ys.items()): x_to_y[x] = next(iter(ys)) return x_to_y @@ -161,9 +177,11 @@ def build(self, bad_a=None, bad_b=None): self._make_unique_oneway(bs_to_as, as_to_bs, bad_b) return self._build_oneway(as_to_bs) + def _process_mod_init_func_for_metaclasses(func, found_metaclass): """Process a function from the __mod_init_func section for OSMetaClass information.""" _log(4, 'Processing function {}', idc.GetFunctionName(func)) + def on_BL(addr, reg): X0, X1, X3 = reg['X0'], reg['X1'], reg['X3'] if not (X0 and X1 and X3): @@ -173,69 +191,84 @@ def on_BL(addr, reg): if not idc.SegName(X1).endswith("__TEXT.__cstring") or not idc.SegName(X0): return found_metaclass(X0, idc.GetString(X1), X3, reg['X2'] or None) + _emulate_arm64(func, idc.FindFuncEnd(func), on_BL=on_BL) + def _process_mod_init_func_section_for_metaclasses(segstart, found_metaclass): """Process a __mod_init_func section for OSMetaClass information.""" segend = idc.SegEnd(segstart) for func in idau.ReadWords(segstart, segend): _process_mod_init_func_for_metaclasses(func, found_metaclass) + def _should_process_segment(seg, segname): """Check if we should process the specified segment.""" return segname.endswith('__DATA_CONST.__mod_init_func') or \ - segname == '__DATA.__kmod_init' + segname == '__DATA.__kmod_init' + def _collect_metaclasses(): """Collect OSMetaClass information from all kexts in the kernelcache.""" # Collect associations from class names to metaclass instances and vice versa. metaclass_to_classname_builder = _OneToOneMapFactory() - metaclass_to_class_size = dict() + metaclass_to_class_size = dict() metaclass_to_meta_superclass = dict() + def found_metaclass(metaclass, classname, class_size, meta_superclass): metaclass_to_classname_builder.add_link(metaclass, classname) - metaclass_to_class_size[metaclass] = class_size + metaclass_to_class_size[metaclass] = class_size metaclass_to_meta_superclass[metaclass] = meta_superclass + for ea in idautils.Segments(): segname = idc.SegName(ea) if not _should_process_segment(ea, segname): continue _log(2, 'Processing segment {}', segname) _process_mod_init_func_section_for_metaclasses(ea, found_metaclass) + # Filter out any class name (and its associated metaclasses) that has multiple metaclasses. # This can happen when multiple kexts define a class but only one gets loaded. def bad_classname(classname, metaclasses): _log(0, 'Class {} has multiple metaclasses: {}', classname, - ', '.join(['{:#x}'.format(mc) for mc in metaclasses])) + ', '.join(['{:#x}'.format(mc) for mc in metaclasses])) + # Filter out any metaclass (and its associated class names) that has multiple class names. I # have no idea why this would happen. def bad_metaclass(metaclass, classnames): _log(0, 'Metaclass {:#x} has multiple classes: {}', metaclass, - ', '.join(classnames)) + ', '.join(classnames)) + # Return the final dictionary of metaclass info. metaclass_to_classname = metaclass_to_classname_builder.build(bad_metaclass, bad_classname) metaclass_info = dict() - for metaclass, classname in metaclass_to_classname.items(): + for metaclass, classname in list(metaclass_to_classname.items()): meta_superclass = metaclass_to_meta_superclass[metaclass] superclass_name = metaclass_to_classname.get(meta_superclass, None) metaclass_info[metaclass] = classes.ClassInfo(classname, metaclass, None, None, - metaclass_to_class_size[metaclass], superclass_name, meta_superclass) + metaclass_to_class_size[metaclass], superclass_name, + meta_superclass) return metaclass_info -_VTABLE_GETMETACLASS = vtable.VTABLE_OFFSET + 7 + +_VTABLE_GETMETACLASS = vtable.VTABLE_OFFSET + 7 _MAX_GETMETACLASS_INSNS = 3 + def _get_vtable_metaclass(vtable_addr, metaclass_info): """Simulate the getMetaClass method of the vtable and check if it returns an OSMetaClass.""" getMetaClass = idau.read_word(vtable_addr + _VTABLE_GETMETACLASS * idau.WORD_SIZE) + def on_RET(reg): on_RET.ret = reg['X0'] + on_RET.ret = None _emulate_arm64(getMetaClass, getMetaClass + idau.WORD_SIZE * _MAX_GETMETACLASS_INSNS, - on_RET=on_RET) + on_RET=on_RET) if on_RET.ret in metaclass_info: return on_RET.ret + def _process_const_section_for_vtables(segstart, metaclass_info, found_vtable): """Process a __const section to search for virtual method tables.""" segend = idc.SegEnd(segstart) @@ -249,11 +282,13 @@ def _process_const_section_for_vtables(segstart, metaclass_info, found_vtable): found_vtable(metaclass, addr, length) addr += length * idau.WORD_SIZE + def _collect_vtables(metaclass_info): """Use OSMetaClass information to search for virtual method tables.""" # Build a mapping from OSMetaClass instances to virtual method tables. metaclass_to_vtable_builder = _OneToOneMapFactory() vtable_lengths = {} + # Define a callback for when we find a vtable. def found_vtable(metaclass, vtable, length): # Add our vtable length. @@ -271,11 +306,12 @@ def found_vtable(metaclass, vtable, length): vtable_classname = symbol.vtable_symbol_get_class(vtable_symbol) if vtable_classname != classname: _log(2, 'Declining association between metaclass {:x} ({}) and vtable {:x} ({})', - metaclass, classname, vtable, vtable_classname) + metaclass, classname, vtable, vtable_classname) return # Add a link if they are in the same kext. if segment.kernelcache_kext(metaclass) == segment.kernelcache_kext(vtable): metaclass_to_vtable_builder.add_link(metaclass, vtable) + # Process all the segments with found_vtable(). for ea in idautils.Segments(): segname = idc.SegName(ea) @@ -283,6 +319,7 @@ def found_vtable(metaclass, vtable, length): continue _log(2, 'Processing segment {}', segname) _process_const_section_for_vtables(ea, metaclass_info, found_vtable) + # If a metaclass has multiple vtables, that's really weird, unless the metaclass is # OSMetaClass's metaclass. In that case all OSMetaClass subclasses will have their vtables # refer back to OSMetaClass's metaclass. @@ -291,22 +328,24 @@ def bad_metaclass(metaclass, vtables): if metaclass_name != 'OSMetaClass': vtinfo = ['{:#x}'.format(vt) for vt in vtables] _log(0, 'Metaclass {:#x} ({}) has multiple vtables: {}', metaclass, - metaclass_name, ', '.join(vtinfo)) + metaclass_name, ', '.join(vtinfo)) + # If a vtable has multiple metaclasses, that's really weird. def bad_vtable(vtable, metaclasses): mcinfo = ['{:#x} ({})'.format(mc, metaclass_info[mc].classname) for mc in metaclasses] _log(0, 'Vtable {:#x} has multiple metaclasses: {}', vtable, ', '.join(mcinfo)) + metaclass_to_vtable = metaclass_to_vtable_builder.build(bad_metaclass, bad_vtable) # The resulting mapping may have fewer metaclasses than metaclass_info. class_info = dict() - for metaclass, classinfo in metaclass_info.items(): + for metaclass, classinfo in list(metaclass_info.items()): # Add the vtable and its length, which we didn't have earlier. If the current class doesn't # have a vtable, take it from the superclass (recursing if necessary). metaclass_with_vtable = metaclass while metaclass_with_vtable: vtable = metaclass_to_vtable.get(metaclass_with_vtable, None) if vtable: - classinfo.vtable = vtable + classinfo.vtable = vtable classinfo.vtable_length = vtable_lengths[vtable] break classinfo_with_vtable = metaclass_info.get(metaclass_with_vtable, None) @@ -323,10 +362,12 @@ def bad_vtable(vtable, metaclasses): class_info[classinfo.classname] = classinfo return class_info, vtable_lengths + def _check_filetype(filetype): """Checks that the filetype is compatible before trying to process it.""" return 'Mach-O' in filetype and 'ARM64' in filetype + def collect_class_info_internal(): """Collect information about C++ classes defined in a kernelcache. @@ -348,4 +389,3 @@ def collect_class_info_internal(): return None _log(1, 'Done') return class_info, all_vtables - diff --git a/ida_kernelcache/data_flow.py b/ida_kernelcache/data_flow.py index 7c7f411..9021db5 100644 --- a/ida_kernelcache/data_flow.py +++ b/ida_kernelcache/data_flow.py @@ -15,13 +15,16 @@ """ +from __future__ import absolute_import + import collections -import idc -import idautils import idaapi +import idautils +from builtins import range +from six.moves import range -import ida_utilities as idau +from . import ida_utilities as idau _log = idau.make_log(2, __name__) @@ -35,14 +38,15 @@ ] _INSN_OP_DTYP_SZ = { - idaapi.dt_byte: 1, - idaapi.dt_word: 2, + idaapi.dt_byte: 1, + idaapi.dt_word: 2, idaapi.dt_dword: 4, idaapi.dt_qword: 8, } _ARM64_WRITEBACK = 0x20 | 0x80 + def _create_flow(function, bounds): """Create a FlowChart.""" f, b = None, None @@ -55,6 +59,7 @@ def _create_flow(function, bounds): b = (start, end) return idaapi.FlowChart(f=f, bounds=b) + def _add_blocks_to_queue(queue, flow, addresses): for ea in addresses: for bb in flow: @@ -64,6 +69,7 @@ def _add_blocks_to_queue(queue, flow, addresses): else: _log(2, 'Address {:#x} not contained in any basic block', ea) + def _pointer_accesses_process_block(start, end, fix, entry_regs, accesses): """Process a basic block for _pointer_accesses_data_flow. @@ -73,8 +79,9 @@ def _pointer_accesses_process_block(start, end, fix, entry_regs, accesses): # STR X0, [X19,X8] # We try to catch these by keeping track of local constants within a block. RegValue = collections.namedtuple('RegValue', ['type', 'value']) - DELTA = 0 # Pointer delta from start of target memory region. - CONST = 1 # Constant value + DELTA = 0 # Pointer delta from start of target memory region. + CONST = 1 # Constant value + def get_reg(reg, type): rv = regs.get(reg, None) if rv is None or rv.type != type: @@ -82,7 +89,7 @@ def get_reg(reg, type): return rv.value # Initialize our registers and create accessor functions. - regs = { reg: RegValue(DELTA, delta) for reg, delta in entry_regs.items() } + regs = {reg: RegValue(DELTA, delta) for reg, delta in list(entry_regs.items())} # For each instruction in the basic block, see if any new register gets assigned. for insn in idau.Instructions(start, end): @@ -91,7 +98,7 @@ def get_reg(reg, type): # the caller to ensure that this initialization is correct. fixed_regs_and_deltas = fix.get(insn.ea) if fixed_regs_and_deltas: - for reg, delta in fixed_regs_and_deltas.items(): + for reg, delta in list(fixed_regs_and_deltas.items()): _log(6, '\t\t{:x} fix {}={}', insn.ea, reg, delta) regs[reg] = RegValue(DELTA, delta) # If this is an access instruction, record the access. See comment about auxpref below. @@ -115,7 +122,7 @@ def get_reg(reg, type): op_offset = None if op.type == idaapi.o_displ: op_offset = op.addr - else: # op.type == idaapi.o_phrase + else: # op.type == idaapi.o_phrase op_offset_reg = op.specflag1 & 0xff op_offset = get_reg(op_offset_reg, CONST) if op_offset is None: @@ -137,21 +144,21 @@ def get_reg(reg, type): _log(6, '\t\t{:x} add {}={}', insn.ea, insn.Op1.reg, regs[insn.Op2.reg].value) regs[insn.Op1.reg] = regs[insn.Op2.reg] elif (insn.itype == idaapi.ARM_mov - and insn.Op1.type == idaapi.o_reg - and insn.Op2.type == idaapi.o_imm - and insn.Op3.type == idaapi.o_void - and insn.Op1.dtyp in (idaapi.dt_dword, idaapi.dt_qword)): + and insn.Op1.type == idaapi.o_reg + and insn.Op2.type == idaapi.o_imm + and insn.Op3.type == idaapi.o_void + and insn.Op1.dtyp in (idaapi.dt_dword, idaapi.dt_qword)): # MOV Xdst, #imm _log(7, '\t\t{:x} const {}={}', insn.ea, insn.Op1.reg, insn.Op2.value) regs[insn.Op1.reg] = RegValue(CONST, insn.Op2.value) elif (insn.itype == idaapi.ARM_add - and insn.Op1.type == idaapi.o_reg - and insn.Op2.type == idaapi.o_reg - and insn.Op3.type == idaapi.o_imm - and insn.Op4.type == idaapi.o_void - and insn.Op1.dtyp == idaapi.dt_qword - and insn.Op2.dtyp == idaapi.dt_qword - and insn.Op2.reg in regs): + and insn.Op1.type == idaapi.o_reg + and insn.Op2.type == idaapi.o_reg + and insn.Op3.type == idaapi.o_imm + and insn.Op4.type == idaapi.o_void + and insn.Op1.dtyp == idaapi.dt_qword + and insn.Op2.dtyp == idaapi.dt_qword + and insn.Op2.reg in regs): # ADD Xdst, Xsrc, #amt op2 = regs[insn.Op2.reg] _log(6, '\t\t{:x} add {}={}+{}', insn.ea, insn.Op1.reg, op2.value, insn.Op3.value) @@ -161,7 +168,7 @@ def get_reg(reg, type): # does not use the temporary registers after a call, but just to be safe, clear all the # temporary registers. _log(6, '\t\t{:x} clear temps', insn.ea) - for r in xrange(0, 19): + for r in range(0, 19): regs.pop(getattr(idautils.procregs, 'X{}'.format(r)).reg, None) else: # This is an unrecognized instruction. Clear all the registers it modifies. @@ -181,14 +188,15 @@ def get_reg(reg, type): or (insn.auxpref & _ARM64_WRITEBACK and op.type == idaapi.o_displ)): _log(6, '\t\t{:x} clear {}', insn.ea, op.reg) regs.pop(op.reg, None) - return { reg: rv.value for reg, rv in regs.items() if rv.type == DELTA } + return {reg: rv.value for reg, rv in list(regs.items()) if rv.type == DELTA} + def _pointer_accesses_data_flow(flow, initialization, accesses): """Run the data flow for pointer_accesses.""" # bb_regs maps each block id to another map from register ids to corresponding struct offsets # at the start of the block. We don't consider the case where a register could contain more # than one possible offset. - bb_regs = { bb.id: {} for bb in flow } + bb_regs = {bb.id: {} for bb in flow} # We'll start by processing those blocks that have an initial value. queue = collections.deque() _add_blocks_to_queue(queue, flow, initialization) @@ -210,7 +218,7 @@ def _pointer_accesses_data_flow(flow, initialization, accesses): _log(3, 'Basic block {} {:x}-{:x}', bb.id, bb.startEA, bb.endEA) _log(4, '\tregs@entry = {}', entry_regs) exit_regs = _pointer_accesses_process_block(bb.startEA, bb.endEA, initialization, - entry_regs, accesses) + entry_regs, accesses) _log(4, '\tregs@exit = {}', exit_regs) _log(4, '\tsuccs = {}', [s.id for s in bb.succs()]) for succ in bb.succs(): @@ -228,6 +236,7 @@ def _pointer_accesses_data_flow(flow, initialization, accesses): if update: queue.append(succ) + def pointer_accesses(function=None, bounds=None, initialization=None, accesses=None): """Collect the set of accesses to a pointer register. @@ -277,4 +286,3 @@ def pointer_accesses(function=None, bounds=None, initialization=None, accesses=N if create: accesses = dict(accesses) return accesses - diff --git a/ida_kernelcache/ida_utilities.py b/ida_kernelcache/ida_utilities.py index 2b3ee86..1772f9f 100644 --- a/ida_kernelcache/ida_utilities.py +++ b/ida_kernelcache/ida_utilities.py @@ -5,22 +5,33 @@ # Some utility functions to make working with IDA easier. # +from __future__ import absolute_import +from __future__ import print_function + from collections import deque -import idc -import idautils import idaapi +import idautils +import idc +from builtins import next +from builtins import object +from builtins import range +from six.moves import range + def make_log(log_level, module): """Create a logging function.""" + def log(level, *args): if len(args) == 0: return level <= log.level if level <= log.level: - print module + ': ' + args[0].format(*args[1:]) + print(module + ': ' + args[0].format(*args[1:])) + log.level = log_level return log + _log = make_log(1, __name__) WORD_SIZE = 0 @@ -32,6 +43,7 @@ def log(level, *args): LITTLE_ENDIAN = True """Whether the current platform is little-endian. Always the opposite of BIG_ENDIAN.""" + def _initialize(): # https://reverseengineering.stackexchange.com/questions/11396/how-to-get-the-cpu-architecture-via-idapython global WORD_SIZE, LITTLE_ENDIAN, BIG_ENDIAN @@ -48,19 +60,25 @@ def _initialize(): BIG_ENDIAN = info.mf LITTLE_ENDIAN = not BIG_ENDIAN + _initialize() + def iterlen(iterator): """Consume an iterator and return its length.""" return sum(1 for _ in iterator) + class AlignmentError(Exception): """An exception that is thrown if an address with improper alignment is encountered.""" + def __init__(self, address): self.address = address + def __str__(self): return repr(self.address) + def is_mapped(ea, size=1, value=True): """Check if the given address is mapped. @@ -86,6 +104,7 @@ def is_mapped(ea, size=1, value=True): else: return idaapi.getseg(ea) and (size == 1 or idaapi.getseg(ea + size - 1)) + def get_name_ea(name, fromaddr=idc.BADADDR): """Get the address of a name. @@ -106,6 +125,7 @@ def get_name_ea(name, fromaddr=idc.BADADDR): """ return idc.LocByNameEx(fromaddr, name) + def get_ea_name(ea, fromaddr=idc.BADADDR, true=False, user=False): """Get the name of an address. @@ -132,6 +152,7 @@ def get_ea_name(ea, fromaddr=idc.BADADDR, true=False, user=False): else: return idc.NameEx(fromaddr, ea) + def set_ea_name(ea, name, rename=False, auto=False): """Set the name of an address. @@ -155,19 +176,23 @@ def set_ea_name(ea, name, rename=False, auto=False): flags |= idc.SN_AUTO return bool(idc.MakeNameEx(ea, name, flags)) + def _insn_op_stroff_700(insn, n, sid, delta): """A wrapper of idc.OpStroffEx for IDA 7.""" return idc.OpStroffEx(insn, n, sid, delta) + def _insn_op_stroff_695(insn, n, sid, delta): """A wrapper of idc.OpStroffEx for IDA 6.95.""" return idc.OpStroffEx(insn.ea, n, sid, delta) + if idaapi.IDA_SDK_VERSION < 700: insn_op_stroff = _insn_op_stroff_695 else: insn_op_stroff = _insn_op_stroff_700 + def _addresses(start, end, step, partial, aligned): """A generator to iterate over the addresses in an address range.""" addr = start @@ -181,12 +206,13 @@ def _addresses(start, end, step, partial, aligned): if addr < end and partial: yield addr + def _mapped_addresses(addresses, step, partial, allow_unmapped): """Wrap an _addresses generator with a filter that checks whether the addresses are mapped.""" for addr in addresses: start_is_mapped = is_mapped(addr) - end_is_mapped = is_mapped(addr + step - 1) - fully_mapped = start_is_mapped and end_is_mapped + end_is_mapped = is_mapped(addr + step - 1) + fully_mapped = start_is_mapped and end_is_mapped allowed_partial = partial and (start_is_mapped or end_is_mapped) # Yield the value if it's sufficiently mapped. Otherwise, break if we stop at an # unmapped address. @@ -195,8 +221,9 @@ def _mapped_addresses(addresses, step, partial, allow_unmapped): elif not allow_unmapped: break + def Addresses(start, end=None, step=1, length=None, partial=False, aligned=False, - unmapped=True, allow_unmapped=False): + unmapped=True, allow_unmapped=False): """A generator to iterate over the addresses in an address range. Arguments: @@ -226,7 +253,7 @@ def Addresses(start, end=None, step=1, length=None, partial=False, aligned=False end_addr = start + length * step if end is not None and end != end_addr: raise ValueError('Invalid arguments: start={}, end={}, step={}, length={}' - .format(start, end, step, length)) + .format(start, end, step, length)) end = end_addr if end is None: raise ValueError('Invalid arguments: end={}, length={}'.format(end, length)) @@ -238,6 +265,7 @@ def Addresses(start, end=None, step=1, length=None, partial=False, aligned=False else: return _mapped_addresses(addresses, step, partial, allow_unmapped) + def _instructions_by_range(start, end): """A generator to iterate over instructions in a range.""" pc = start @@ -251,15 +279,17 @@ def _instructions_by_range(start, end): yield insn pc = next_pc + def _instructions_by_count(pc, count): """A generator to iterate over a specified number of instructions.""" - for i in xrange(count): + for i in range(count): insn = idautils.DecodeInstruction(pc) if insn is None: break yield insn pc += insn.size + def Instructions(start, end=None, count=None): """A generator to iterate over instructions. @@ -284,18 +314,21 @@ def Instructions(start, end=None, count=None): else: return _instructions_by_count(start, count) + _FF_FLAG_FOR_SIZE = { - 1: idc.FF_BYTE, - 2: idc.FF_WORD, - 4: idc.FF_DWRD, - 8: idc.FF_QWRD, + 1: idc.FF_BYTE, + 2: idc.FF_WORD, + 4: idc.FF_DWRD, + 8: idc.FF_QWRD, 16: idc.FF_OWRD, } + def word_flag(wordsize=WORD_SIZE): """Get the FF_xxxx flag for the given word size.""" return _FF_FLAG_FOR_SIZE.get(wordsize, 0) + def read_word(ea, wordsize=WORD_SIZE): """Get the word at the given address. @@ -314,6 +347,7 @@ def read_word(ea, wordsize=WORD_SIZE): return idc.Qword(ea) raise ValueError('Invalid argument: wordsize={}'.format(wordsize)) + def patch_word(ea, value, wordsize=WORD_SIZE): """Patch the word at the given address. @@ -331,18 +365,23 @@ def patch_word(ea, value, wordsize=WORD_SIZE): else: raise ValueError('Invalid argument: wordsize={}'.format(wordsize)) + class objectview(object): """A class to present an object-like view of a struct.""" + # https://goodcode.io/articles/python-dict-object/ def __init__(self, fields, addr, size): self.__dict__ = fields - self.__addr = addr - self.__size = size + self.__addr = addr + self.__size = size + def __int__(self): return self.__addr + def __len__(self): return self.__size + def _read_struct_member_once(ea, flags, size, member_sid, member_size, asobject): """Read part of a struct member for _read_struct_member.""" if idc.isByte(flags): @@ -366,6 +405,7 @@ def _read_struct_member_once(ea, flags, size, member_sid, member_size, asobject) return value, member_size return None, size + def _read_struct_member(struct, sid, union, ea, offset, name, size, asobject): """Read a member into a struct for read_struct.""" flags = idc.GetMemberFlag(sid, offset) @@ -384,7 +424,7 @@ def _read_struct_member(struct, sid, union, ea, offset, name, size, asobject): processed = 0 while processed < size: value, read = _read_struct_member_once(member + processed, flags, size, member_sid, - member_ssize, asobject) + member_ssize, asobject) assert size % read == 0 array.append(value) processed += read @@ -394,6 +434,7 @@ def _read_struct_member(struct, sid, union, ea, offset, name, size, asobject): value = array struct[name] = value + def read_struct(ea, struct=None, sid=None, members=None, asobject=False): """Read a structure from the given address. @@ -436,10 +477,12 @@ def read_struct(ea, struct=None, sid=None, members=None, asobject=False): struct = objectview(struct, ea, idc.GetStrucSize(sid)) return struct + def null_terminated(string): """Extract the NULL-terminated C string from the given array of bytes.""" return string.split('\0', 1)[0] + def _convert_address_to_function(func): """Convert an address that IDA has classified incorrectly into a proper function.""" # If everything goes wrong, we'll try to restore this function. @@ -449,7 +492,7 @@ def _convert_address_to_function(func): if not is_mapped(func): # Well, that's awkward. return False - item = idc.ItemHead(func) + item = idc.ItemHead(func) itemend = idc.ItemEnd(func) if item != idc.BADADDR: _log(1, 'Undefining item {:#x} - {:#x}', item, itemend) @@ -491,16 +534,19 @@ def _convert_address_to_function(func): idc.MakeFunction(orig) return False + def is_function_start(ea): """Return True if the address is the start of a function.""" return idc.GetFunctionAttr(ea, idc.FUNCATTR_START) == ea + def force_function(addr): """Ensure that the given address is a function type, converting it if necessary.""" if is_function_start(addr): return True return _convert_address_to_function(addr) + def ReadWords(start, end, step=WORD_SIZE, wordsize=WORD_SIZE, addresses=False): """A generator to iterate over the data words in the given address range. @@ -525,6 +571,7 @@ def ReadWords(start, end, step=WORD_SIZE, wordsize=WORD_SIZE, addresses=False): value = (word, addr) if addresses else word yield value + def WindowWords(start, end, window_size, wordsize=WORD_SIZE): """A generator to iterate over a sliding window of data words in the given address range. @@ -541,6 +588,7 @@ def WindowWords(start, end, window_size, wordsize=WORD_SIZE): addr += wordsize yield window, addr + def struct_create(name, union=False): """Create an IDA struct with the given name, returning the SID.""" # AddStrucEx is documented as returning -1 on failure, but in practice it seems to return @@ -551,6 +599,7 @@ def struct_create(name, union=False): return None return sid + def struct_open(name, create=False, union=None): """Get the SID of the IDA struct with the given name, optionally creating it.""" sid = idc.GetStrucIdByName(name) @@ -564,6 +613,7 @@ def struct_open(name, create=False, union=None): return None return sid + def struct_member_offset(sid, name): """A version of IDA's GetMemberOffset() that also works with unions.""" struct = idaapi.get_struc(sid) @@ -574,6 +624,7 @@ def struct_member_offset(sid, name): return None return member.soff + def struct_add_word(sid, name, offset, size, count=1): """Add a word (integer) to a structure. @@ -581,6 +632,7 @@ def struct_add_word(sid, name, offset, size, count=1): """ return idc.AddStrucMember(sid, name, offset, idc.FF_DATA | word_flag(size), -1, size * count) + def struct_add_ptr(sid, name, offset, count=1, type=None): """Add a pointer to a structure. @@ -596,6 +648,7 @@ def struct_add_ptr(sid, name, offset, count=1, type=None): idc.SetType(mid, type) return ret + def struct_add_struct(sid, name, offset, msid, count=1): """Add a structure member to a structure. @@ -603,4 +656,3 @@ def struct_add_struct(sid, name, offset, msid, count=1): """ size = idc.GetStrucSize(msid) return idc.AddStrucMember(sid, name, offset, idc.FF_DATA | idc.FF_STRU, msid, size * count) - diff --git a/ida_kernelcache/internal.py b/ida_kernelcache/internal.py index c88bc23..97c8348 100644 --- a/ida_kernelcache/internal.py +++ b/ida_kernelcache/internal.py @@ -5,25 +5,32 @@ # Miscellaneous internal routines. # +from __future__ import absolute_import + from collections import defaultdict import idc +from builtins import range +from builtins import str +from six.moves import range + +from . import ida_utilities as idau -import ida_utilities as idau def make_name_generator(suffix, max_count=999999): """Create a unique name generator using the specified template factory.""" next_index_dict = defaultdict(lambda: 1) + def get_next(name): assert name, 'Invalid symbol name passed to name generator' assert suffix not in name, 'Symbol name passed to name generator already contains suffix' template = name + suffix - for index in xrange(next_index_dict[name], max_count): + for index in range(next_index_dict[name], max_count): new_name = template + str(index) if idau.get_name_ea(new_name) == idc.BADADDR: next_index_dict[name] = index return new_name new_index_dict[name] = max_count return None - return get_next + return get_next diff --git a/ida_kernelcache/kernel.py b/ida_kernelcache/kernel.py index 5b1e664..4c37238 100644 --- a/ida_kernelcache/kernel.py +++ b/ida_kernelcache/kernel.py @@ -6,22 +6,27 @@ # prior initialization via ida_kernelcache is necessary. # -import idc -import idautils +from __future__ import absolute_import + import idaapi +import idautils +import idc -import ida_utilities as idau -import kplist +from . import ida_utilities as idau +from . import kplist _log = idau.make_log(0, __name__) + def find_kernel_base(): """Find the kernel base.""" return idaapi.get_fileregion_ea(0) + base = find_kernel_base() """The kernel base address (the address of the main kernel Mach-O header).""" + def _find_prelink_info_segments(): """Find all candidate __PRELINK_INFO segments (or sections). @@ -39,9 +44,10 @@ def _find_prelink_info_segments(): _log(0, 'Could not find any __PRELINK_INFO segment candidates') elif len(segments) > 1: _log(1, 'Multiple segment names contain __PRELINK_INFO: {}', - [idc.SegName(seg) for seg in segments]) + [idc.SegName(seg) for seg in segments]) return segments + def parse_prelink_info(): """Find and parse the kernel __PRELINK_INFO dictionary.""" segments = _find_prelink_info_segments() @@ -53,16 +59,18 @@ def parse_prelink_info(): _log(0, 'Could not find __PRELINK_INFO') return None + prelink_info = parse_prelink_info() """The kernel __PRELINK_INFO dictionary.""" KC_11_NORMAL = '11-normal' KC_12_MERGED = '12-merged' + def _get_kernelcache_format(): if '_PrelinkLinkKASLROffsets' in prelink_info: return KC_11_NORMAL return KC_12_MERGED -kernelcache_format = _get_kernelcache_format() +kernelcache_format = _get_kernelcache_format() diff --git a/ida_kernelcache/kplist.py b/ida_kernelcache/kplist.py index 08e072a..36e998d 100644 --- a/ida_kernelcache/kplist.py +++ b/ida_kernelcache/kplist.py @@ -6,11 +6,17 @@ # - https://github.com/python/cpython/blob/3.6/Lib/plistlib.py # +from __future__ import absolute_import + import base64 from xml.etree.ElementTree import XMLTreeBuilder +from builtins import object + + class _KPlistBuilder(object): """A companion class for XMLTreeBuilder to parse a kernel-style property list.""" + # IMPLEMENTATION IDEA: The XMLTreeBuilder calls us at four points: when there's a new start # tag, when there's a new end tag, when there's data from a tag, and when there's no more data. # We build objects incrementally out of these notifications. Each tag type can implement @@ -29,29 +35,29 @@ class _KPlistBuilder(object): def __init__(self): self.collection_stack = [] - self.ids = {} - self.current_data = [] - self.current_id = None - self.current_idref = None - self.current_key = None - self.root = None - self.start_handler = { - 'dict': self.start_dict, - 'array': self.start_array, + self.ids = {} + self.current_data = [] + self.current_id = None + self.current_idref = None + self.current_key = None + self.root = None + self.start_handler = { + 'dict': self.start_dict, + 'array': self.start_array, } - self.end_handler = { - 'dict': self.end_dict, - 'key': self.end_key, - 'true': self.end_true, - 'false': self.end_false, - 'integer': self.end_integer, - 'string': self.end_string, - 'data': self.end_data, + self.end_handler = { + 'dict': self.end_dict, + 'key': self.end_key, + 'true': self.end_true, + 'false': self.end_false, + 'integer': self.end_integer, + 'string': self.end_string, + 'data': self.end_data, } - self.attributes = { - 'integer': ('size',), + self.attributes = { + 'integer': ('size',), } - self.tags = set(self.start_handler.keys()).union(self.end_handler.keys()) + self.tags = set(self.start_handler.keys()).union(list(self.end_handler.keys())) # XMLTreeBuilder calls. @@ -72,7 +78,7 @@ def start(self, tag, attr): original_tag, _ = self.ids[self.current_idref] if tag != original_tag: raise ValueError('tag "{}" has IDREF to element with different tag "{}"' - .format(tag, original_tag)) + .format(tag, original_tag)) if len(attr) > 1: raise ValueError('tag has IDREF and another attribute') return @@ -200,13 +206,13 @@ def end_string(self): def end_data(self): return base64.b64decode(self.get_data()) + def kplist_parse(plist): """Parse a kernel-style property list.""" try: builder = _KPlistBuilder() - parser = XMLTreeBuilder(target=builder) + parser = XMLTreeBuilder(target=builder) parser.feed(plist) return parser.close() except: return None - diff --git a/ida_kernelcache/metaclass.py b/ida_kernelcache/metaclass.py index 44bed63..9b6b014 100644 --- a/ida_kernelcache/metaclass.py +++ b/ida_kernelcache/metaclass.py @@ -5,26 +5,29 @@ # A module for working with OSMetaClass instances in the kernelcache. # -import idc +from __future__ import absolute_import -import ida_utilities as idau -import classes -import symbol +from . import classes +from . import ida_utilities as idau +from . import symbol _log = idau.make_log(0, __name__) + def metaclass_name_for_class(classname): """Return the name of the C++ metaclass for the given class.""" if '::' in classname: return None return classname + '::MetaClass' + def metaclass_instance_name_for_class(classname): """Return the name of the C++ metaclass instance for the given class.""" if '::' in classname: return None return classname + '::gMetaClass' + def metaclass_symbol_for_class(classname): """Get the symbol name for the OSMetaClass instance for the given class name. @@ -39,6 +42,7 @@ def metaclass_symbol_for_class(classname): return None return symbol.global_name(metaclass_instance) + def add_metaclass_symbol(metaclass, classname): """Add a symbol for the OSMetaClass instance at the specified address. @@ -52,10 +56,11 @@ def add_metaclass_symbol(metaclass, classname): metaclass_symbol = metaclass_symbol_for_class(classname) if not idau.set_ea_name(metaclass, metaclass_symbol): _log(0, 'Address {:#x} already has name {} instead of OSMetaClass instance symbol {}' - .format(metaclass, idau.get_ea_name(metaclass), metaclass_symbol)) + .format(metaclass, idau.get_ea_name(metaclass), metaclass_symbol)) return False return True + def initialize_metaclass_symbols(): """Populate IDA with OSMetaClass instance symbols for an iOS kernelcache. @@ -63,12 +68,11 @@ def initialize_metaclass_symbols(): instance. """ classes.collect_class_info() - for classname, classinfo in classes.class_info.items(): + for classname, classinfo in list(classes.class_info.items()): if classinfo.metaclass: _log(1, 'Class {} has OSMetaClass instance at {:#x}', classname, classinfo.metaclass) if not add_metaclass_symbol(classinfo.metaclass, classname): _log(0, 'Could not add metaclass symbol for class {} at address {:#x}', classname, - classinfo.metaclass) + classinfo.metaclass) else: _log(1, 'Class {} has no known OSMetaClass instance', classname) - diff --git a/ida_kernelcache/offset.py b/ida_kernelcache/offset.py index 7c72005..fef77e6 100644 --- a/ida_kernelcache/offset.py +++ b/ida_kernelcache/offset.py @@ -5,18 +5,20 @@ # Functions for converting and symbolicating offsets. # +from __future__ import absolute_import + import re -import idc import idautils +import idc -import ida_utilities as idau -import internal -import kernel -import stub +from . import ida_utilities as idau +from . import internal +from . import stub _log = idau.make_log(1, __name__) + def initialize_data_offsets(): """Convert offsets in data segments into offsets in IDA. @@ -35,12 +37,14 @@ def initialize_data_offsets(): if idau.is_mapped(word, value=False): idc.OpOff(ea, 0, 0) + kernelcache_offset_suffix = '___offset_' """The suffix that gets appended to a symbol to create the offset name, without the offset ID.""" _offset_regex = re.compile(r"^(\S+)" + kernelcache_offset_suffix + r"\d+$") """A regular expression to match and extract the target name from an offset symbol.""" + def offset_name_target(offset_name): """Get the target to which an offset name refers. @@ -51,6 +55,7 @@ def offset_name_target(offset_name): return None return match.group(1) + def _process_offset(offset, ea, next_offset): """Process an offset in a __got section.""" # Convert the address containing the offset into an offset in IDA, but continue if it fails. @@ -65,7 +70,7 @@ def _process_offset(offset, ea, next_offset): # comment in _symbolicate_stub. if stub.symbol_references_stub(name): _log(1, 'Offset at address {:#x} has target {:#x} (name {}) that references a stub', ea, - offset, name) + offset, name) return False # Set the new name for the offset. symbol = next_offset(name) @@ -77,6 +82,7 @@ def _process_offset(offset, ea, next_offset): return False return True + def _process_offsets_section(segstart, next_offset): """Process all the offsets in a __got section.""" for offset, ea in idau.ReadWords(segstart, idc.SegEnd(segstart), addresses=True): @@ -87,6 +93,7 @@ def _process_offsets_section(segstart, next_offset): else: _log(-1, 'Offset {:#x} at address {:#x} is unmapped', offset, ea) + def initialize_offset_symbols(): """Populate IDA with information about the offsets in an iOS kernelcache. @@ -102,4 +109,3 @@ def initialize_offset_symbols(): continue _log(2, 'Processing segment {}', segname) _process_offsets_section(ea, next_offset) - diff --git a/ida_kernelcache/segment.py b/ida_kernelcache/segment.py index d3e60ef..e0fb68c 100644 --- a/ida_kernelcache/segment.py +++ b/ida_kernelcache/segment.py @@ -6,10 +6,15 @@ # necessary. # +from __future__ import absolute_import + import idc +from builtins import range +from builtins import str +from six.moves import range -import ida_utilities as idau -import kernel +from . import ida_utilities as idau +from . import kernel _log = idau.make_log(0, __name__) @@ -20,48 +25,54 @@ _LC_SEGMENT_64 = 0x19 + def _macho_segments_and_sections(ea): """A generator to iterate through a Mach-O file's segments and sections. Each iteration yields a tuple: (segname, segstart, segend, [(sectname, sectstart, sectend), ...]) """ - hdr = idau.read_struct(ea, 'mach_header_64', asobject=True) - nlc = hdr.ncmds - lc = int(hdr) + len(hdr) + hdr = idau.read_struct(ea, 'mach_header_64', asobject=True) + nlc = hdr.ncmds + lc = int(hdr) + len(hdr) lcend = lc + hdr.sizeofcmds while lc < lcend and nlc > 0: loadcmd = idau.read_struct(lc, 'load_command', asobject=True) if loadcmd.cmd == _LC_SEGMENT_64: segcmd = idau.read_struct(lc, 'segment_command_64', asobject=True) - segname = idau.null_terminated(segcmd.segname) + segname = idau.null_terminated(segcmd.segname) segstart = segcmd.vmaddr - segend = segstart + segcmd.vmsize - sects = [] - sc = int(segcmd) + len(segcmd) + segend = segstart + segcmd.vmsize + sects = [] + sc = int(segcmd) + len(segcmd) for i in range(segcmd.nsects): sect = idau.read_struct(sc, 'section_64', asobject=True) - sectname = idau.null_terminated(sect.sectname) + sectname = idau.null_terminated(sect.sectname) sectstart = sect.addr - sectend = sectstart + sect.size + sectend = sectstart + sect.size sects.append((sectname, sectstart, sectend)) sc += len(sect) yield (segname, segstart, segend, sects) - lc += loadcmd.cmdsize + lc += loadcmd.cmdsize nlc -= 1 + def _initialize_segments_in_kext(kext, mach_header, skip=[]): """Rename the segments in the specified kext.""" + def log_seg(segname, segstart, segend): _log(3, '+ segment {: <20} {:x} - {:x} ({:x})', segname, segstart, segend, - segend - segstart) + segend - segstart) + def log_sect(sectname, sectstart, sectend): _log(3, ' section {: <20} {:x} - {:x} ({:x})', sectname, sectstart, sectend, - sectend - sectstart) + sectend - sectstart) + def log_gap(gapno, start, end, mapped): mapped = 'mapped' if mapped else 'unmapped' _log(3, ' gap {: <20} {:x} - {:x} ({:x}, {})', gapno, start, end, - end - start, mapped) + end - start, mapped) + def process_region(segname, name, start, end): assert end >= start if segname in skip: @@ -80,20 +91,22 @@ def process_region(segname, name, start, end): ida_segend = idc.SegEnd(ida_segstart) if start != ida_segstart or end != ida_segend: _log(0, 'IDA thinks segment {} {:x} - {:x} should be {:x} - {:x}', newname, start, end, - ida_segstart, ida_segend) + ida_segstart, ida_segend) return _log(2, 'Rename {:x} - {:x}: {} -> {}', start, end, idc.SegName(start), newname) idc.SegRename(start, newname) + def process_gap(segname, gapno, start, end): mapped = idau.is_mapped(start) log_gap(gapno, start, end, mapped) if mapped: name = 'HEADER' if start == mach_header else '__gap_' + str(gapno) process_region(segname, name, start, end) + for segname, segstart, segend, sects in _macho_segments_and_sections(mach_header): log_seg(segname, segstart, segend) lastend = segstart - gapno = 0 + gapno = 0 for sectname, sectstart, sectend in sects: if lastend < sectstart: process_gap(segname, gapno, lastend, sectstart) @@ -105,6 +118,7 @@ def process_gap(segname, gapno, start, end): process_gap(segname, gapno, lastend, segend) gapno += 1 + def initialize_segments(): """Rename the kernelcache segments in IDA according to the __PRELINK_INFO data. @@ -131,8 +145,10 @@ def initialize_segments(): _log(1, 'Renaming segments in {}', kext) _initialize_segments_in_kext(kext, mach_header) + _kext_regions = [] + def _initialize_kext_regions(): """Get region information for each kext based on iOS 12's __PRELINK_INFO.__kmod_start. @@ -154,8 +170,10 @@ def _initialize_kext_regions(): _log(1, 'Adding module: {:x} - {:x} {}', segstart, segend, kmod_name) _kext_regions.append((segstart, segend, kmod_name)) + _initialize_kext_regions() + def kernelcache_kext(ea): """Return the name of the kext to which the given linear address belongs. @@ -173,4 +191,3 @@ def kernelcache_kext(ea): if start <= ea < end: return kext return None - diff --git a/ida_kernelcache/stub.py b/ida_kernelcache/stub.py index c793dd5..d8cdc1a 100644 --- a/ida_kernelcache/stub.py +++ b/ida_kernelcache/stub.py @@ -5,14 +5,16 @@ # Functions for analyzing stub functions in the kernelcache. # +from __future__ import absolute_import + import re -import idc -import idautils import idaapi +import idautils +import idc -import ida_utilities as idau -import internal +from . import ida_utilities as idau +from . import internal _log = idau.make_log(1, __name__) @@ -22,6 +24,7 @@ _stub_regex = re.compile(r"^(\S+)" + kernelcache_stub_suffix + r"\d+$") """A regular expression to match and extract the target name from a stub symbol.""" + def stub_name_target(stub_name): """Get the target to which a stub name refers. @@ -32,10 +35,12 @@ def stub_name_target(stub_name): return None return match.group(1) + def symbol_references_stub(symbol_name): """Check if the symbol name references a stub.""" return kernelcache_stub_suffix in symbol_name + def _process_stub_template_1(stub): """A template to match the following stub pattern: @@ -55,10 +60,12 @@ def _process_stub_template_1(stub): if target and idau.is_mapped(target): return target + _stub_processors = ( _process_stub_template_1, ) + def stub_target(stub_func): """Find the target function called by a stub. @@ -73,6 +80,7 @@ def stub_target(stub_func): except: pass + def _symbolicate_stub(stub, target, next_stub): """Set a symbol for a stub function.""" name = idau.get_ea_name(target, user=True) @@ -89,7 +97,7 @@ def _symbolicate_stub(stub, target, next_stub): # semantics in the original code, so it's not appropriate for us to cover that up with a stub. if symbol_references_stub(name): _log(2, 'Stub {:#x} has target {:#x} (name {}) that references another stub', stub, target, - name) + name) return False symbol = next_stub(name) if symbol is None: @@ -100,6 +108,7 @@ def _symbolicate_stub(stub, target, next_stub): return False return True + def _process_possible_stub(stub, make_thunk, next_stub): """Try to process a stub function.""" # First, make sure this is a stub format we recognize. @@ -133,6 +142,7 @@ def _process_possible_stub(stub, make_thunk, next_stub): return False return True + def _process_stubs_section(segstart, make_thunk, next_stub): """Process all the functions in a __stubs section.""" segend = idc.SegEnd(segstart) @@ -142,6 +152,7 @@ def _process_stubs_section(segstart, make_thunk, next_stub): if idc.isRef(idc.GetFlags(ea)) and not stub_name_target(idau.get_ea_name(ea)): _process_possible_stub(ea, make_thunk, next_stub) + def initialize_stub_symbols(make_thunk=True): """Populate IDA with information about the stubs in an iOS kernelcache. @@ -160,4 +171,3 @@ def initialize_stub_symbols(make_thunk=True): continue _log(3, 'Processing segment {}', segname) _process_stubs_section(ea, make_thunk, next_stub) - diff --git a/ida_kernelcache/symbol.py b/ida_kernelcache/symbol.py index 4d9230e..edad927 100644 --- a/ida_kernelcache/symbol.py +++ b/ida_kernelcache/symbol.py @@ -10,10 +10,13 @@ strings. """ +from __future__ import absolute_import + import re -import idc import idaapi +import idc + def method_name(symbol): """Get the name of the C++ method from its symbol. @@ -21,27 +24,29 @@ def method_name(symbol): If the symbol demangles to 'Class::method(args)', this function returns 'method'. """ try: - demangled = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_SHORT_DN)) - func = demangled.split('::', 1)[1] - base = func.split('(', 1)[0] + demangled = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_SHORT_DN)) + func = demangled.split('::', 1)[1] + base = func.split('(', 1)[0] return base or None except: return None + def method_arguments_string(symbol): """Get the arguments string of the C++ method from its symbol. If the symbol demangles to 'Class::method(arg1, arg2)', this function returns 'arg1, arg2'. """ try: - demangled = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_LONG_DN)) - func = demangled.split('::', 1)[1] - args = func.split('(', 1)[1] - args = args.rsplit(')', 1)[0].strip() + demangled = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_LONG_DN)) + func = demangled.split('::', 1)[1] + args = func.split('(', 1)[1] + args = args.rsplit(')', 1)[0].strip() return args except: return None + def method_arguments(symbol): """Get the arguments list of the C++ method from its symbol. @@ -72,6 +77,7 @@ def method_arguments(symbol): except: return None + def method_argument_pointer_types(symbol): """Get the base types of pointer types used in the arguments to a C++ method.""" args = method_arguments_string(symbol) @@ -88,9 +94,10 @@ def method_argument_pointer_types(symbol): if re.match(r"[^ ]+ [*][* ]*", argtype): ptrtypes.add(argtype.split(' ', 1)[0]) ptrtypes.difference_update(['void', 'bool', 'char', 'short', 'int', 'long', 'float', 'double', - 'longlong', '__int64']) + 'longlong', '__int64']) return ptrtypes + def method_argument_types(symbol, sign=True): """Get the base types used in the arguments to a C++ method.""" try: @@ -109,6 +116,7 @@ def method_argument_types(symbol, sign=True): except: return None + def convert_function_type_to_function_pointer_type(typestr): """Convert a function type string into a function pointer type string. @@ -121,6 +129,7 @@ def convert_function_type_to_function_pointer_type(typestr): except: return None + def make_ident(name): """Convert a name into a valid identifier, substituting any invalid characters.""" ident = '' @@ -131,6 +140,7 @@ def make_ident(name): ident += '_' return ident + def _mangle_name(scopes): symbol = '' if len(scopes) > 1: @@ -143,6 +153,7 @@ def _mangle_name(scopes): symbol += 'E' return symbol + def vtable_symbol_for_class(classname): """Get the mangled symbol name for the vtable for the given class name. @@ -157,6 +168,7 @@ def vtable_symbol_for_class(classname): return None return '__ZTV' + name + def vtable_symbol_get_class(symbol): """Get the class name for a vtable symbol.""" try: @@ -167,6 +179,7 @@ def vtable_symbol_get_class(symbol): except: return None + def global_name(name): """Get the mangled symbol name for the global name. @@ -180,4 +193,3 @@ def global_name(name): if not mangled: return None return '__Z' + mangled - diff --git a/ida_kernelcache/tagged_pointers.py b/ida_kernelcache/tagged_pointers.py index 723988e..f509d3d 100644 --- a/ida_kernelcache/tagged_pointers.py +++ b/ida_kernelcache/tagged_pointers.py @@ -11,31 +11,39 @@ approach is better because it is closer to what the kernelcache looks like at runtime. """ -import idc +from __future__ import absolute_import + import idautils +import idc -import ida_utilities as idau -import kernel +from . import ida_utilities as idau +from . import kernel _log = idau.make_log(1, __name__) + def tagged_pointer_tag(tp): return (tp >> 48) & 0xffff + def tagged_pointer_untag(tp): return tp | 0xffff000000000000 + def is_tagged_pointer_format(value): return tagged_pointer_tag(value) != 0xffff and \ - (value & 0x0000ffff00000000) == 0x0000fff000000000 + (value & 0x0000ffff00000000) == 0x0000fff000000000 + def is_tagged_pointer(value): return is_tagged_pointer_format(value) and \ - idau.is_mapped(tagged_pointer_untag(value), value=False) + idau.is_mapped(tagged_pointer_untag(value), value=False) + def tagged_pointer_link(tag): return (tag >> 1) & ~0x3 + def tagged_pointer_next(ea, tp, end=None): assert ea # First try to get the offset to the next link. @@ -55,11 +63,13 @@ def tagged_pointer_next(ea, tp, end=None): # If we didn't find any tagged pointers at all, return None. return None + def untag_pointer(ea, tp): _log(4, 'Untagging pointer at {:x}', ea) idau.patch_word(ea, tagged_pointer_untag(tp)) idc.OpOff(ea, 0, 0) + def untag_pointers_in_range(start, end): assert kernel.kernelcache_format == kernel.KC_12_MERGED, 'Wrong kernelcache format' ea, tp = start, None @@ -73,9 +83,9 @@ def untag_pointers_in_range(start, end): break untag_pointer(ea, tp) + def untag_pointers(): _log(2, 'Starting tagged pointer conversion') for seg in idautils.Segments(): untag_pointers_in_range(idc.SegStart(seg), idc.SegEnd(seg)) _log(2, 'Tagged pointer conversion complete') - diff --git a/ida_kernelcache/vtable.py b/ida_kernelcache/vtable.py index 45a5ca9..a1c495c 100644 --- a/ida_kernelcache/vtable.py +++ b/ida_kernelcache/vtable.py @@ -5,25 +5,31 @@ # Functions for analyzing and symbolicating vtables in the kernelcache. # +from __future__ import absolute_import + from itertools import islice, takewhile -import idc import idautils +import idc +from builtins import next +from builtins import range +from six.moves import range -from symbol import vtable_symbol_for_class -import ida_utilities as idau -import classes -import stub +from . import classes +from . import ida_utilities as idau +from . import stub +from .symbol import vtable_symbol_for_class _log = idau.make_log(0, __name__) -VTABLE_OFFSET = 2 +VTABLE_OFFSET = 2 """The first few entries of the virtual method tables in the kernelcache are empty.""" MIN_VTABLE_METHODS = 12 """The minimum number of methods in a virtual method table.""" -MIN_VTABLE_LENGTH = VTABLE_OFFSET + MIN_VTABLE_METHODS +MIN_VTABLE_LENGTH = VTABLE_OFFSET + MIN_VTABLE_METHODS """The minimum length of a virtual method table in words, including the initial empty entries.""" + def vtable_length(ea, end=None, scan=False): """Find the length of a virtual method table. @@ -53,6 +59,7 @@ def vtable_length(ea, end=None, scan=False): definitely not the start of a vtable, then possible is False and length is the number of words that can be skipped when searching for the next vtable. """ + # TODO: This function should be reorganized. The better way of doing it is to count the number # of zero entries, then the number of nonzero entries, then decide based on that. Less # special-casing that way. @@ -62,6 +69,7 @@ def return_value(possible, length): if scan: return possible, length return length if possible else 0 + # Initialize default values. if end is None: end = idc.SegEnd(ea) @@ -101,6 +109,7 @@ def return_value(possible, length): # Now it's simple: We are valid if the length is long enough, invalid if it's too short. return return_value(length >= MIN_VTABLE_LENGTH, length) + def convert_vtable_to_offsets(vtable, length=None): """Convert a vtable into a sequence of offsets. @@ -125,20 +134,23 @@ def convert_vtable_to_offsets(vtable, length=None): successful = False return successful + def _convert_vtable_methods_to_functions(vtable, length): """Convert each virtual method in the vtable into an IDA function.""" for vmethod in vtable_methods(vtable, length=length): if not idau.force_function(vmethod): _log(0, 'Could not convert virtual method {:#x} into a function', vmethod) + def initialize_vtables(): """Convert vtables into offsets and ensure that virtual methods are IDA functions.""" classes.collect_class_info() - for vtable, length in classes.vtables.items(): + for vtable, length in list(classes.vtables.items()): if not convert_vtable_to_offsets(vtable, length): _log(0, 'Could not convert vtable at address {:x} into offsets', vtable) _convert_vtable_methods_to_functions(vtable, length) + def add_vtable_symbol(vtable, classname): """Add a symbol for the virtual method table at the specified address. @@ -152,22 +164,24 @@ def add_vtable_symbol(vtable, classname): vtable_symbol = vtable_symbol_for_class(classname) if not idau.set_ea_name(vtable, vtable_symbol): _log(0, 'Address {:#x} already has name {} instead of vtable symbol {}' - .format(vtable, idau.get_ea_name(vtable), vtable_symbol)) + .format(vtable, idau.get_ea_name(vtable), vtable_symbol)) return False return True + def initialize_vtable_symbols(): """Populate IDA with virtual method table symbols for an iOS kernelcache.""" classes.collect_class_info() - for classname, classinfo in classes.class_info.items(): + for classname, classinfo in list(classes.class_info.items()): if classinfo.vtable: _log(3, 'Class {} has vtable at {:#x}', classname, classinfo.vtable) if not add_vtable_symbol(classinfo.vtable, classname): _log(0, 'Could not add vtable symbol for class {} at address {:#x}', classname, - classinfo.vtable) + classinfo.vtable) else: _log(0, 'Class {} has no known vtable', classname) + def class_vtable_method(classinfo, index): """Get the virtual method for a class by index. @@ -183,6 +197,7 @@ def class_vtable_method(classinfo, index): return None return idau.read_word(methods + index * idau.WORD_SIZE) + def vtable_methods(vtable, start=VTABLE_OFFSET, length=None, nmethods=None): """Get the methods in a virtual method table. @@ -208,9 +223,10 @@ def vtable_methods(vtable, start=VTABLE_OFFSET, length=None, nmethods=None): elif length is None: length = vtable_length(vtable) # Read the methods. - for i in xrange(start, length): + for i in range(start, length): yield idau.read_word(vtable + i * idau.WORD_SIZE) + def class_vtable_methods(classinfo, nmethods=None, new=False): """Get the methods in a virtual method table for a class. @@ -232,10 +248,11 @@ def class_vtable_methods(classinfo, nmethods=None, new=False): else: start = VTABLE_OFFSET return vtable_methods(classinfo.vtable, start=start, length=classinfo.vtable_length, - nmethods=nmethods) + nmethods=nmethods) + def vtable_overrides(class_vtable, super_vtable, class_vlength=None, super_vlength=None, - new=False, methods=False): + new=False, methods=False): """Get the overrides of a virtual method table. A generator that returns the index of each override in the virtual method table. The initial @@ -261,8 +278,8 @@ def vtable_overrides(class_vtable, super_vtable, class_vlength=None, super_vleng super_vlength = vtable_length(super_vtable) assert class_vlength >= super_vlength >= 0 # Skip the first VTABLE_OFFSET entries. - class_vtable += VTABLE_OFFSET * idau.WORD_SIZE - super_vtable += VTABLE_OFFSET * idau.WORD_SIZE + class_vtable += VTABLE_OFFSET * idau.WORD_SIZE + super_vtable += VTABLE_OFFSET * idau.WORD_SIZE class_vlength -= VTABLE_OFFSET super_vlength -= VTABLE_OFFSET # How many methods are we iterating over? @@ -271,7 +288,7 @@ def vtable_overrides(class_vtable, super_vtable, class_vlength=None, super_vleng else: nmethods = super_vlength # Iterate through the methods. - for i in xrange(nmethods): + for i in range(nmethods): # Read the old method. super_method = None if i < super_vlength: @@ -285,6 +302,7 @@ def vtable_overrides(class_vtable, super_vtable, class_vlength=None, super_vleng else: yield i + def class_vtable_overrides(classinfo, superinfo=None, new=False, methods=False): """Get the overrides of a virtual method table for a class. @@ -313,7 +331,7 @@ def class_vtable_overrides(classinfo, superinfo=None, new=False, methods=False): else: if superinfo not in classinfo.ancestors(): raise ValueError('Invalid arguments: classinfo={}, superinfo={}'.format(classinfo, - superinfo)) + superinfo)) # Get the vtable for the class. class_vtable = classinfo.vtable class_vlength = classinfo.vtable_length @@ -327,9 +345,10 @@ def class_vtable_overrides(classinfo, superinfo=None, new=False, methods=False): super_vlength = 0 # Run the generator. for x in vtable_overrides(class_vtable, super_vtable, class_vlength=class_vlength, - super_vlength=super_vlength, new=new, methods=methods): + super_vlength=super_vlength, new=new, methods=methods): yield x + def class_from_vtable_method_symbol(method_symbol): """Get the base class in a vtable method symbol. @@ -343,6 +362,7 @@ def class_from_vtable_method_symbol(method_symbol): return None return classname + def _vtable_method_symbol_substitute_class(method_symbol, new_class, old_class=None): """Create a new method symbol by substituting the class to which the method belongs.""" # TODO: This is wrong when the class name is repeated! @@ -356,20 +376,24 @@ def _vtable_method_symbol_substitute_class(method_symbol, new_class, old_class=N return None return method_symbol.replace(old_class_part, new_class_part, 1) + _ignore_vtable_methods = ( '___cxa_pure_virtual' ) + def _ok_to_rename_method(override, name): """Some method names are ok to rename.""" return (name.startswith('j_') and idau.iterlen(idautils.XrefsTo(override)) == 1) + def _bad_name_dont_use_as_override(name): """Some names shouldn't propagate into vtable symbols.""" # Ignore jumps and stubs and fixed known special values. return (name.startswith('j_') or stub.symbol_references_stub(name) or name in _ignore_vtable_methods) + def _symbolicate_overrides_for_classinfo(classinfo, processed): """A recursive function to symbolicate vtable overrides for a class and its superclasses.""" # If we've already been processed, stop. @@ -392,7 +416,7 @@ def _symbolicate_overrides_for_classinfo(classinfo, processed): new_name = _vtable_method_symbol_substitute_class(original_name, classinfo.classname) if not new_name: _log(0, 'Could not substitute class {} into method symbol {} for override {:#x}', - classinfo.classname, original_name, override) + classinfo.classname, original_name, override) continue # Now that we have the new name, set it. if override_name: @@ -402,6 +426,7 @@ def _symbolicate_overrides_for_classinfo(classinfo, processed): # We're done. processed.add(classinfo) + def initialize_vtable_method_symbols(): """Symbolicate overridden methods in a virtual method table. @@ -409,6 +434,5 @@ def initialize_vtable_method_symbols(): """ processed = set() classes.collect_class_info() - for classinfo in classes.class_info.values(): + for classinfo in list(classes.class_info.values()): _symbolicate_overrides_for_classinfo(classinfo, processed) - diff --git a/ida_kernelcache_reload.py b/ida_kernelcache_reload.py index d2d62bb..8b8120f 100644 --- a/ida_kernelcache_reload.py +++ b/ida_kernelcache_reload.py @@ -6,10 +6,11 @@ # modules. # +from __future__ import absolute_import + import sys -for mod in sys.modules.keys(): + +for mod in list(sys.modules.keys()): if 'ida_kernelcache' in mod: del sys.modules[mod] -import ida_kernelcache -import ida_kernelcache as kc diff --git a/scripts/find_virtual_method_overrides.py b/scripts/find_virtual_method_overrides.py index 78008b6..e3d1d9c 100644 --- a/scripts/find_virtual_method_overrides.py +++ b/scripts/find_virtual_method_overrides.py @@ -5,6 +5,10 @@ # Use ida_kernelcache to find classes that override a virtual method. # +from __future__ import absolute_import +from __future__ import print_function + + def kernelcache_find_virtual_method_overrides(classname=None, method=None): import idc import idaapi @@ -20,8 +24,9 @@ def __init__(self): <#The class#Class :{classname}> <#The virtual method#Method:{method}>""", { 'classname': idaapi.Form.StringInput(tp=idaapi.Form.FT_IDENT, swidth=swidth), - 'method': idaapi.Form.StringInput(tp=idaapi.Form.FT_IDENT, swidth=swidth), + 'method': idaapi.Form.StringInput(tp=idaapi.Form.FT_IDENT, swidth=swidth), }) + def OnFormChange(self, fid): return 1 @@ -31,34 +36,34 @@ def OnFormChange(self, fid): f = MyForm() f.Compile() f.classname.value = classname or '' - f.method.value = method or '' + f.method.value = method or '' ok = f.Execute() if ok != 1: - print 'Cancelled' + print('Cancelled') return False classname = f.classname.value - method = f.method.value + method = f.method.value f.Free() if classname not in kc.class_info: - print 'Not a valid class: {}'.format(classname) + print('Not a valid class: {}'.format(classname)) return False - print 'Subclasses of {} that override {}:'.format(classname, method) + print('Subclasses of {} that override {}:'.format(classname, method)) baseinfo = kc.class_info[classname] found = False for classinfo in baseinfo.descendants(): for _, override, _ in kc.vtable.class_vtable_overrides(classinfo, superinfo=baseinfo, - methods=True): + methods=True): name = idc.NameEx(idc.BADADDR, override) demangled = idc.Demangle(name, idc.GetLongPrm(idc.INF_SHORT_DN)) name = demangled if demangled else name if method in name: - print '{:#x} {}'.format(override, classinfo.classname) + print('{:#x} {}'.format(override, classinfo.classname)) found = True if not found: - print 'No subclass of {} overrides {}'.format(classname, method) + print('No subclass of {} overrides {}'.format(classname, method)) return found -kernelcache_find_virtual_method_overrides() +kernelcache_find_virtual_method_overrides() diff --git a/scripts/populate_struct.py b/scripts/populate_struct.py index ce1efcf..3ac2039 100644 --- a/scripts/populate_struct.py +++ b/scripts/populate_struct.py @@ -5,6 +5,10 @@ # Populate a class or struct using data flow analysis. # +from __future__ import absolute_import +from __future__ import print_function + + def kernelcache_populate_struct(struct=None, address=None, register=None, delta=None): import idc import idautils @@ -23,11 +27,12 @@ def __init__(self): <#The address of the instruction at which the register points to the structure#Address :{address}> <#The register containing the pointer to the structure#Register :{register}> <#The offset of the pointer from the start of the structure#Delta :{delta}>""", { - 'structure': idaapi.Form.StringInput( tp=idaapi.Form.FT_IDENT, swidth=swidth), - 'address': idaapi.Form.NumericInput(tp=idaapi.Form.FT_ADDR, swidth=swidth, width=1000), - 'register': idaapi.Form.StringInput( tp=idaapi.Form.FT_IDENT, swidth=swidth), - 'delta': idaapi.Form.NumericInput(tp=idaapi.Form.FT_INT64, swidth=swidth), + 'structure': idaapi.Form.StringInput(tp=idaapi.Form.FT_IDENT, swidth=swidth), + 'address': idaapi.Form.NumericInput(tp=idaapi.Form.FT_ADDR, swidth=swidth, width=1000), + 'register': idaapi.Form.StringInput(tp=idaapi.Form.FT_IDENT, swidth=swidth), + 'delta': idaapi.Form.NumericInput(tp=idaapi.Form.FT_INT64, swidth=swidth), }) + def OnFormChange(self, fid): return 1 @@ -36,17 +41,17 @@ def OnFormChange(self, fid): f = MyForm() f.Compile() f.structure.value = struct or '' - f.address.value = address or idc.ScreenEA() - f.register.value = register or 'X0' - f.delta.value = delta or 0 + f.address.value = address or idc.ScreenEA() + f.register.value = register or 'X0' + f.delta.value = delta or 0 ok = f.Execute() if ok != 1: - print 'Cancelled' + print('Cancelled') return False - struct = f.structure.value - address = f.address.value + struct = f.structure.value + address = f.address.value register = f.register.value - delta = f.delta.value + delta = f.delta.value f.Free() # Check whether this struct is a class. @@ -56,12 +61,12 @@ def OnFormChange(self, fid): # Open the structure. sid = idau.struct_open(struct, create=True) if sid is None: - print 'Could not open struct {}'.format(struct) + print('Could not open struct {}'.format(struct)) return False # Check that the address is in a function. if not idaapi.get_func(address): - print 'Address {:#x} is not a function'.format(address) + print('Address {:#x} is not a function'.format(address)) return False # Get the register id. @@ -70,22 +75,22 @@ def OnFormChange(self, fid): register_id = idaapi.str2reg(register) elif type(register) is int: register_id = register - register = idaapi.get_reg_name(register_id, 8) + register = idaapi.get_reg_name(register_id, 8) if register_id is None or register_id < 0: - print 'Invalid register {}'.format(register) + print('Invalid register {}'.format(register)) return False # Validate delta. if delta < 0 or delta > 0x1000000: - print 'Invalid delta {}'.format(delta) + print('Invalid delta {}'.format(delta)) return False elif is_class and delta != 0: - print 'Nonzero delta not yet supported' + print('Nonzero delta not yet supported') return False type_name = 'class' if is_class else 'struct' - print '{} = {}, address = {:#x}, register = {}, delta = {:#x}'.format(type_name, struct, - address, register, delta) + print('{} = {}, address = {:#x}, register = {}, delta = {:#x}'.format(type_name, struct, + address, register, delta)) if is_class: # Run the analysis. @@ -93,11 +98,11 @@ def OnFormChange(self, fid): else: # Run the data flow to collect the accesses and then add those fields to the struct. accesses = kc.data_flow.pointer_accesses(function=address, - initialization={ address: { register_id: delta } }) + initialization={address: {register_id: delta}}) kc.build_struct.create_struct_fields(sid, accesses=accesses) # Set the offsets to stroff. - for addresses_and_deltas in accesses.values(): + for addresses_and_deltas in list(accesses.values()): for ea, delta in addresses_and_deltas: insn = idautils.DecodeInstruction(ea) if insn: @@ -106,8 +111,8 @@ def OnFormChange(self, fid): idau.insn_op_stroff(insn, op.n, sid, delta) # All done! :) - print 'Done' + print('Done') return True -kernelcache_populate_struct() +kernelcache_populate_struct() diff --git a/scripts/process_external_methods.py b/scripts/process_external_methods.py index 4ce9146..6655d01 100644 --- a/scripts/process_external_methods.py +++ b/scripts/process_external_methods.py @@ -7,9 +7,12 @@ # { selector, input_scalars_count, input_structure_size, output_scalars_count, output_structure_size } # +from __future__ import absolute_import +from __future__ import print_function + + def kernelcache_process_external_methods(ea=None, struct_type=None, count=None): import idc - import ida_kernelcache as kc import ida_kernelcache.ida_utilities as idau kIOUCVariableStructureSize = 0xffffffff @@ -23,10 +26,10 @@ def kernelcache_process_external_methods(ea=None, struct_type=None, count=None): kIOUCFlags = 0xff IOExternalMethod_types = (kIOUCScalarIScalarO, kIOUCScalarIStructO, kIOUCStructIStructO, - kIOUCScalarIStructI) + kIOUCScalarIStructI) IOExternalMethod_count0_scalar = (kIOUCScalarIScalarO, kIOUCScalarIStructO, - kIOUCScalarIStructI) + kIOUCScalarIStructI) IOExternalMethod_count1_scalar = (kIOUCScalarIScalarO,) @@ -74,9 +77,9 @@ def process_IOExternalMethod(obj): return (isc, iss, osc, oss) TYPE_MAP = { - 'IOExternalMethodDispatch': - (is_IOExternalMethodDispatch, process_IOExternalMethodDispatch), - 'IOExternalMethod': (is_IOExternalMethod, process_IOExternalMethod), + 'IOExternalMethodDispatch': + (is_IOExternalMethodDispatch, process_IOExternalMethodDispatch), + 'IOExternalMethod': (is_IOExternalMethod, process_IOExternalMethod), } # Get the EA. @@ -92,27 +95,27 @@ def process_IOExternalMethod(obj): if check(obj): break else: - print 'Address {:#x} does not look like any known external method struct'.format(ea) + print('Address {:#x} does not look like any known external method struct'.format(ea)) return False else: if struct_type not in TYPE_MAP: - print 'Unknown external method struct type {}'.format(struct_type) + print('Unknown external method struct type {}'.format(struct_type)) return False check, process = TYPE_MAP[struct_type] obj = idau.read_struct(ea, struct=struct_type, asobject=True) if not check(obj): - print 'Address {:#x} does not look like {}'.format(ea, struct_type) + print('Address {:#x} does not look like {}'.format(ea, struct_type)) # Process the external methods. selector = 0; while (count is None and check(obj)) or (selector < count): isc, iss, osc, oss = process(obj) - print '{{ {:3}, {:5}, {:#10x}, {:5}, {:#10x} }}'.format(selector, isc, iss, osc, oss) + print('{{ {:3}, {:5}, {:#10x}, {:5}, {:#10x} }}'.format(selector, isc, iss, osc, oss)) selector += 1 ea += len(obj) obj = idau.read_struct(ea, struct=struct_type, asobject=True) return True -kernelcache_process_external_methods() +kernelcache_process_external_methods()