Skip to content
2 changes: 1 addition & 1 deletion ida_kernelcache/class_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def _convert_operands_to_struct_offsets(access_addresses):
if insn:
for op in insn.Operands:
if op.type == idaapi.o_displ:
if not idc.OpStroffEx(ea, op.n, sid, delta):
if not idc.OpStroffEx(insn, op.n, sid, delta):
_log(1, 'Could not convert {:#x} to struct offset for class {} '
'delta {}', ea, classname, delta)

Expand Down
199 changes: 186 additions & 13 deletions ida_kernelcache/collect_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,15 @@

_MEMOP_WBINDEX = _MEMOP_PREINDEX | _MEMOP_POSTINDEX

# on 64bit devices __DATA_CONST segment is used for constant data
# instead of __DATA (eg __DATA_CONST.__const instead of __DATA.__const)
if idau.WORD_SIZE == 4:
_CONST_SEGNAME = '__DATA'
else:
_CONST_SEGNAME = '__DATA_CONST'

class _Regs(object):
"""A set of registers for _emulate_arm64."""
"""A set of registers for _emulate_arm64/32."""

class _Unknown:
"""A wrapper class indicating that the value is unknown."""
Expand All @@ -52,8 +59,13 @@ def clear(self, reg):
pass

def _reg(self, reg):
if type(reg) is int:
if isinstance(reg, (int, long)):
reg = _Regs._reg_names[reg]

# Automatically map Rn to Xn
if reg[0] == 'R' and reg[1:].isdigit():
reg = 'X' + reg[1:]

return reg

def __getitem__(self, reg):
Expand All @@ -64,11 +76,11 @@ def __getitem__(self, reg):

def __setitem__(self, reg, value):
if value is None or value is _Regs.Unknown:
self.clear(reg)
self.clear(self._reg(reg))
else:
self._regs[self._reg(reg)] = value & 0xffffffffffffffff

def _emulate_arm64(start, end, on_BL=None, on_RET=None):
def _emulate_arm64(start, end=None, count=None, on_BL=None, on_RET=None):
"""A very basic partial Arm64 emulator that does just enough to find OSMetaClass
information."""
# Super basic emulation.
Expand All @@ -86,8 +98,7 @@ def load(addr, dtyp):
def cleartemps():
for t in ['X{}'.format(i) for i in range(0, 19)]:
reg.clear(t)
for insn in idau.Instructions(start, end):
_log(11, 'Processing instruction {:#x}', insn.ea)
for insn in idau.Instructions(start, end=end, count=count):
mnem = insn.get_canon_mnem()
if mnem == 'ADRP' or mnem == 'ADR':
reg[insn.Op1.reg] = insn.Op2.value
Expand Down Expand Up @@ -123,6 +134,167 @@ def cleartemps():
_log(10, 'Unrecognized instruction at address {:#x}', insn.ea)
reg.clearall()

def _emulate_arm32(start, end=None, count=None, on_BL=None, on_RET=None):
"""A very basic partial Arm32 emulator that does just enough to find OSMetaClass
information."""
# Super basic emulation.
reg = _Regs()
def load(addr, dtyp):
if not addr:
return None
if dtyp == idaapi.dt_dword:
size = 4
else:
return None
return idau.read_word(addr, size)
def cleartemps():
for t in ['R{}'.format(i) for i in range(0, 12)]:
reg.clear(t)

# Handle thumb stuff
start = start & ~1
if end is not None:
end = (end + 1) & ~1

# if bl is found, lr is replaced, and marked dirty
# if pop {... lr ...} is found, lr is assumed to be restored to
# original, "clean" state
lr_dirty = False

# Special registers have special handling
_SP_REG = 13
_LR_REG = 14
_PC_REG = 15

for insn in idau.Instructions(start, end=end, count=count):
mnem = insn.get_canon_mnem()
_log(12, 'Regs: {}', reg._regs)
_log(11, 'Processing instruction {} at {:#x}', mnem, insn.ea)
if mnem == 'ADR':
reg[insn.Op1.reg] = insn.Op2.value
elif ((mnem == 'ADD' or mnem == 'SUB')
and insn.Op1.type == insn.Op2.type == idc.o_reg
and insn.Op1.reg == insn.Op2.reg == _SP_REG):
# ignore add/sub on on SP
pass
elif mnem in ('ADD', 'ORR', 'SUB') and insn.Op2.type == idc.o_reg and insn.Op3.type == idc.o_imm:
# There might be more operations, but in practice
# add/sub/orr are enough

# Don't bother checking if src register is unknown and
# just mark dst register as unknown too
if isinstance(reg[insn.Op2.reg], _Regs._Unknown):
reg.clear(insn.Op1.reg)
else:
tmp = reg[insn.Op2.reg]
if mnem == 'ADD':
tmp += insn.Op3.value
elif mnem == 'SUB':
tmp -= insn.Op3.value
elif mnem == 'ORR':
tmp |= insn.Op3.value
else:
pass
reg[insn.Op1.reg] = tmp
elif mnem == 'ADD' and insn.Op3.type == idaapi.o_void:
# Don't bother checking if it's unknown
if not isinstance(reg[insn.Op1.reg], _Regs._Unknown):
if insn.Op2.type == idc.o_imm:
# ADD Rx, <imm>
reg[insn.Op1.reg] = reg[insn.Op1.reg] + insn.Op2.value
elif insn.Op2.type == idc.o_reg and insn.Op2.reg == _PC_REG:
# ADD Rx, PC -- special handling
# On ARM PC is "address of current instruction + 4"
# for historical reasons
reg[insn.Op1.reg] = reg[insn.Op1.reg] + insn.ea + 4
elif mnem == 'NOP':
pass
elif mnem == 'MOV' and insn.Op2.type == idc.o_imm:
reg[insn.Op1.reg] = insn.Op2.value
elif mnem == 'MOV' and insn.Op2.type == idc.o_reg:
reg[insn.Op1.reg] = reg[insn.Op2.reg]
elif mnem == 'BX' and insn.Op1.type == idc.o_reg and insn.Op1.reg == _LR_REG:
# bx lr is often used for ret
if on_RET:
on_RET(reg)
break
elif mnem == 'POP' and insn.Op1.type in (idc.o_idpspec1, idc.o_reg):
poped = []

# Either it's one register pop'ped
if insn.Op1.type == idc.o_reg:
poped.append(insn.Op1.reg)

# Or whole set of them, identified by specval bits
if insn.Op1.type == idc.o_idpspec1:
for i in range(0, 16):
if insn.Op1.specval & (1<<i):
poped.append(i)

for i in poped:
reg.clear(i)

if _PC_REG in poped:
# pop {...pc...} is another way for ret
if on_RET:
on_RET(reg)
break
elif _LR_REG in poped:
lr_dirty = False
elif mnem == 'BL' and insn.Op1.type == idc.o_near:
if on_BL:
on_BL(insn.Op1.addr, reg)
cleartemps()
lr_dirty = True
elif (mnem == 'B' and insn.Op1.type == idc.o_near) or (mnem in ('CBZ', 'CBNZ') and insn.Op2.type == idc.o_near):
dest = insn.Op1.addr if insn.Op2.type == 0 else insn.Op2.addr
if start <= dest <= end:
# silently ignoring branch since start<=dest<=end
# So we check all code not skipping anything because of
# conditions, and also don't get stuck in a loop
continue

if not lr_dirty:
# special case -- when first instruction is branch to
# another place -- means that current function is stub
if insn.ea == start:
_log(11, 'Following {} at {:#x} (to {:#x})', mnem, insn.ea, dest)
_emulate_arm(dest, idc.FindFuncEnd(dest), on_BL=on_BL, on_RET=on_RET, reg=reg)
elif on_RET:
# Consider as bl & ret -- usually happens as a way
# of optimization, when return func2() in the end of
# func1 is replaced by "b _func2"
if on_BL:
on_BL(dest, reg)
cleartemps()
if on_RET:
on_RET(reg)
else:
_log(11, 'NOT Following {} at {:#x} (to {:#x}) and not considering as ret', mnem, insn.ea, dest)
break
elif mnem == 'LDR' and insn.Op2.type == idc.o_mem:
# LDR Rx, =ADDR
reg[insn.Op1.reg] = load(insn.Op2.addr, insn.Op1.dtype)
elif mnem == 'LDR' and insn.Op2.type == idc.o_displ and insn.Op2.value == 0:
# LDR Rx, [Ry]
reg[insn.Op1.reg] = load(reg[insn.Op2.reg], insn.Op1.dtype)
elif mnem == 'PUSH' or mnem == 'STR':
# They don't affect registers directly
pass
else:
# silently clear on V instructions -- they're used pretty
# often but aren't needed for OSMetaClass stuff
if mnem not in ('VMOV', 'VST1', 'VLD1'):
_log(6, 'Unrecognized instruction {} at address {:#x}', mnem, insn.ea)
reg.clearall()

# Universal function
if idau.WORD_SIZE == 4:
_emulate_arm = _emulate_arm32
else: # == 8
_emulate_arm = _emulate_arm64


class _OneToOneMapFactory(object):
"""A factory to extract the largest one-to-one submap."""

Expand Down Expand Up @@ -162,7 +334,7 @@ def build(self, bad_a=None, bad_b=None):

def _process_mod_init_func_for_metaclasses(func, found_metaclass):
"""Process a function from the __mod_init_func section for OSMetaClass information."""
_log(4, 'Processing function {}', idc.GetFunctionName(func))
_log(4, 'Processing function {:#x} ({})', func, idc.GetFunctionName(func))
def on_BL(addr, reg):
X0, X1, X3 = reg['X0'], reg['X1'], reg['X3']
if not (X0 and X1 and X3):
Expand All @@ -172,7 +344,7 @@ def on_BL(addr, reg):
if not idc.SegName(X1).endswith("__TEXT.__cstring") or not idc.SegName(X0):
return
found_metaclass(X0, idc.GetString(X1), X3, reg['X2'] or None)
_emulate_arm64(func, idc.FindFuncEnd(func), on_BL=on_BL)
_emulate_arm(func, idc.FindFuncEnd(func), on_BL=on_BL)

def _process_mod_init_func_section_for_metaclasses(segstart, found_metaclass):
"""Process a __mod_init_func section for OSMetaClass information."""
Expand All @@ -192,7 +364,7 @@ def found_metaclass(metaclass, classname, class_size, meta_superclass):
metaclass_to_meta_superclass[metaclass] = meta_superclass
for ea in idautils.Segments():
segname = idc.SegName(ea)
if not segname.endswith('__DATA_CONST.__mod_init_func'):
if not segname.endswith(_CONST_SEGNAME + '.__mod_init_func'):
continue
_log(2, 'Processing segment {}', segname)
_process_mod_init_func_section_for_metaclasses(ea, found_metaclass)
Expand Down Expand Up @@ -225,8 +397,9 @@ def _get_vtable_metaclass(vtable_addr, metaclass_info):
def on_RET(reg):
on_RET.ret = reg['X0']
on_RET.ret = None
_emulate_arm64(getMetaClass, getMetaClass + idau.WORD_SIZE * _MAX_GETMETACLASS_INSNS,
on_RET=on_RET)

# use count to avoid alignment errors on arm32
_emulate_arm(getMetaClass, count=_MAX_GETMETACLASS_INSNS, on_RET=on_RET)
if on_RET.ret in metaclass_info:
return on_RET.ret

Expand Down Expand Up @@ -254,7 +427,7 @@ def found_vtable(metaclass, vtable, length):
metaclass_to_vtable_builder.add_link(metaclass, vtable)
for ea in idautils.Segments():
segname = idc.SegName(ea)
if not segname.endswith('__DATA_CONST.__const'):
if not segname.endswith(_CONST_SEGNAME + '.__const'):
continue
_log(2, 'Processing segment {}', segname)
_process_const_section_for_vtables(ea, metaclass_info, found_vtable)
Expand Down Expand Up @@ -295,7 +468,7 @@ def bad_vtable(vtable, metaclasses):

def _check_filetype(filetype):
"""Checks that the filetype is compatible before trying to process it."""
return 'Mach-O' in filetype and 'ARM64' in filetype
return 'Mach-O' in filetype and 'ARM' in filetype

def collect_class_info_internal():
"""Collect information about C++ classes defined in a kernelcache.
Expand Down
16 changes: 15 additions & 1 deletion ida_kernelcache/ida_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ def get_ea_name(ea, fromaddr=idc.BADADDR, true=False, user=False):
Returns:
The name of the address or "".
"""
if WORD_SIZE == 4:
s = idc.SegName(ea).lower()
if 'text' in s or 'stub' in s:
ea &= ~1

if user and not idc.hasUserName(idc.GetFlags(ea)):
return ""
if true:
Expand All @@ -148,6 +153,8 @@ def set_ea_name(ea, name, rename=False, auto=False):
Returns:
True if the address was successfully named (or renamed).
"""
if WORD_SIZE == 4:
ea &= ~1
if not rename and idc.hasUserName(idc.GetFlags(ea)):
return get_ea_name(ea) == name
flags = idc.SN_CHECK
Expand Down Expand Up @@ -431,7 +438,11 @@ def _convert_address_to_function(func):
idc.AnalyseArea(item, itemend)
else:
# Just try removing the chunk from its current function.
idc.RemoveFchunk(func, func)
# IDA can add it to another function automatically, so make sure
# it's removed from all functions by doing it in loop until it
# fails
while idc.RemoveFchunk(func, func):
pass
# Now try making a function.
if idc.MakeFunction(func) != 0:
return True
Expand Down Expand Up @@ -463,6 +474,9 @@ def is_function_start(ea):

def force_function(addr):
"""Ensure that the given address is a function type, converting it if necessary."""
# Unset last bin -- so it works with THUMB functions too
# TODO: Consider setting THUMB/ARM mode too
addr &= ~1
if is_function_start(addr):
return True
return _convert_address_to_function(addr)
Expand Down
9 changes: 8 additions & 1 deletion ida_kernelcache/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@

def find_kernel_base():
"""Find the kernel base."""
return idaapi.get_fileregion_ea(0)
kbase = idaapi.get_fileregion_ea(0)

if kbase == idc.BADADDR:
# sometimes kernelcache is a FAT Mach-O with one arch
# sizeof(fat_header) + 1 * sizeof(fat_arch) = 28
kbase = idaapi.get_fileregion_ea(28)

return kbase

base = find_kernel_base()
"""The kernel base address (the address of the main kernel Mach-O header)."""
Expand Down
4 changes: 2 additions & 2 deletions ida_kernelcache/offset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def initialize_data_offsets():
for seg in idautils.Segments():
name = idc.SegName(seg)
if not (name.endswith('__DATA_CONST.__const') or name.endswith('__got')
or name.endswith('__DATA.__data')):
or name.endswith('__DATA.__data') or name.endswith('__nl_symbol_ptr')):
continue
for word, ea in idau.ReadWords(seg, idc.SegEnd(seg), addresses=True):
if idau.is_mapped(word, value=False):
Expand Down Expand Up @@ -95,7 +95,7 @@ def initialize_offset_symbols():
next_offset = internal.make_name_generator(kernelcache_offset_suffix)
for ea in idautils.Segments():
segname = idc.SegName(ea)
if not segname.endswith('__got'):
if not segname.endswith('__got') and not segname.endswith('__nl_symbol_ptr'):
continue
_log(2, 'Processing segment {}', segname)
_process_offsets_section(ea, next_offset)
Expand Down
Loading