diff --git a/ida_kernelcache/class_struct.py b/ida_kernelcache/class_struct.py index 6d1caff..730a0a8 100644 --- a/ida_kernelcache/class_struct.py +++ b/ida_kernelcache/class_struct.py @@ -446,7 +446,7 @@ def _convert_operands_to_struct_offsets(access_addresses): if insn: for op in insn.Operands: if op.type == idaapi.o_displ: - if not idc.OpStroffEx(ea, op.n, sid, delta): + if not idc.OpStroffEx(insn, op.n, sid, delta): _log(1, 'Could not convert {:#x} to struct offset for class {} ' 'delta {}', ea, classname, delta) diff --git a/ida_kernelcache/collect_classes.py b/ida_kernelcache/collect_classes.py index c486bf9..c65c542 100644 --- a/ida_kernelcache/collect_classes.py +++ b/ida_kernelcache/collect_classes.py @@ -24,8 +24,15 @@ _MEMOP_WBINDEX = _MEMOP_PREINDEX | _MEMOP_POSTINDEX +# on 64bit devices __DATA_CONST segment is used for constant data +# instead of __DATA (eg __DATA_CONST.__const instead of __DATA.__const) +if idau.WORD_SIZE == 4: + _CONST_SEGNAME = '__DATA' +else: + _CONST_SEGNAME = '__DATA_CONST' + class _Regs(object): - """A set of registers for _emulate_arm64.""" + """A set of registers for _emulate_arm64/32.""" class _Unknown: """A wrapper class indicating that the value is unknown.""" @@ -52,8 +59,13 @@ def clear(self, reg): pass def _reg(self, reg): - if type(reg) is int: + if isinstance(reg, (int, long)): reg = _Regs._reg_names[reg] + + # Automatically map Rn to Xn + if reg[0] == 'R' and reg[1:].isdigit(): + reg = 'X' + reg[1:] + return reg def __getitem__(self, reg): @@ -64,11 +76,11 @@ def __getitem__(self, reg): def __setitem__(self, reg, value): if value is None or value is _Regs.Unknown: - self.clear(reg) + self.clear(self._reg(reg)) else: self._regs[self._reg(reg)] = value & 0xffffffffffffffff -def _emulate_arm64(start, end, on_BL=None, on_RET=None): +def _emulate_arm64(start, end=None, count=None, on_BL=None, on_RET=None): """A very basic partial Arm64 emulator that does just enough to find OSMetaClass information.""" # Super basic emulation. @@ -86,8 +98,7 @@ def load(addr, dtyp): def cleartemps(): for t in ['X{}'.format(i) for i in range(0, 19)]: reg.clear(t) - for insn in idau.Instructions(start, end): - _log(11, 'Processing instruction {:#x}', insn.ea) + for insn in idau.Instructions(start, end=end, count=count): mnem = insn.get_canon_mnem() if mnem == 'ADRP' or mnem == 'ADR': reg[insn.Op1.reg] = insn.Op2.value @@ -123,6 +134,167 @@ def cleartemps(): _log(10, 'Unrecognized instruction at address {:#x}', insn.ea) reg.clearall() +def _emulate_arm32(start, end=None, count=None, on_BL=None, on_RET=None): + """A very basic partial Arm32 emulator that does just enough to find OSMetaClass + information.""" + # Super basic emulation. + reg = _Regs() + def load(addr, dtyp): + if not addr: + return None + if dtyp == idaapi.dt_dword: + size = 4 + else: + return None + return idau.read_word(addr, size) + def cleartemps(): + for t in ['R{}'.format(i) for i in range(0, 12)]: + reg.clear(t) + + # Handle thumb stuff + start = start & ~1 + if end is not None: + end = (end + 1) & ~1 + + # if bl is found, lr is replaced, and marked dirty + # if pop {... lr ...} is found, lr is assumed to be restored to + # original, "clean" state + lr_dirty = False + + # Special registers have special handling + _SP_REG = 13 + _LR_REG = 14 + _PC_REG = 15 + + for insn in idau.Instructions(start, end=end, count=count): + mnem = insn.get_canon_mnem() + _log(12, 'Regs: {}', reg._regs) + _log(11, 'Processing instruction {} at {:#x}', mnem, insn.ea) + if mnem == 'ADR': + reg[insn.Op1.reg] = insn.Op2.value + elif ((mnem == 'ADD' or mnem == 'SUB') + and insn.Op1.type == insn.Op2.type == idc.o_reg + and insn.Op1.reg == insn.Op2.reg == _SP_REG): + # ignore add/sub on on SP + pass + elif mnem in ('ADD', 'ORR', 'SUB') and insn.Op2.type == idc.o_reg and insn.Op3.type == idc.o_imm: + # There might be more operations, but in practice + # add/sub/orr are enough + + # Don't bother checking if src register is unknown and + # just mark dst register as unknown too + if isinstance(reg[insn.Op2.reg], _Regs._Unknown): + reg.clear(insn.Op1.reg) + else: + tmp = reg[insn.Op2.reg] + if mnem == 'ADD': + tmp += insn.Op3.value + elif mnem == 'SUB': + tmp -= insn.Op3.value + elif mnem == 'ORR': + tmp |= insn.Op3.value + else: + pass + reg[insn.Op1.reg] = tmp + elif mnem == 'ADD' and insn.Op3.type == idaapi.o_void: + # Don't bother checking if it's unknown + if not isinstance(reg[insn.Op1.reg], _Regs._Unknown): + if insn.Op2.type == idc.o_imm: + # ADD Rx, + reg[insn.Op1.reg] = reg[insn.Op1.reg] + insn.Op2.value + elif insn.Op2.type == idc.o_reg and insn.Op2.reg == _PC_REG: + # ADD Rx, PC -- special handling + # On ARM PC is "address of current instruction + 4" + # for historical reasons + reg[insn.Op1.reg] = reg[insn.Op1.reg] + insn.ea + 4 + elif mnem == 'NOP': + pass + elif mnem == 'MOV' and insn.Op2.type == idc.o_imm: + reg[insn.Op1.reg] = insn.Op2.value + elif mnem == 'MOV' and insn.Op2.type == idc.o_reg: + reg[insn.Op1.reg] = reg[insn.Op2.reg] + elif mnem == 'BX' and insn.Op1.type == idc.o_reg and insn.Op1.reg == _LR_REG: + # bx lr is often used for ret + if on_RET: + on_RET(reg) + break + elif mnem == 'POP' and insn.Op1.type in (idc.o_idpspec1, idc.o_reg): + poped = [] + + # Either it's one register pop'ped + if insn.Op1.type == idc.o_reg: + poped.append(insn.Op1.reg) + + # Or whole set of them, identified by specval bits + if insn.Op1.type == idc.o_idpspec1: + for i in range(0, 16): + if insn.Op1.specval & (1< 0: - loadcmd = idau.read_struct(lc, 'load_command', asobject=True) - if loadcmd.cmd == _LC_SEGMENT_64: - segcmd = idau.read_struct(lc, 'segment_command_64', asobject=True) + loadcmd = idau.read_struct(lc, _LOAD_COMMAND, asobject=True) + if loadcmd.cmd == _LC_SEGMENT: + segcmd = idau.read_struct(lc, _SEGMENT_COMMAND, asobject=True) segname = idau.null_terminated(segcmd.segname) segstart = segcmd.vmaddr segend = segstart + segcmd.vmsize sects = [] sc = int(segcmd) + len(segcmd) for i in range(segcmd.nsects): - sect = idau.read_struct(sc, 'section_64', asobject=True) + sect = idau.read_struct(sc, _SECTION, asobject=True) sectname = idau.null_terminated(sect.sectname) sectstart = sect.addr sectend = sectstart + sect.size diff --git a/ida_kernelcache/stub.py b/ida_kernelcache/stub.py index c793dd5..3bd2378 100644 --- a/ida_kernelcache/stub.py +++ b/ida_kernelcache/stub.py @@ -43,6 +43,9 @@ def _process_stub_template_1(stub): LDR X, [X, #@PAGEOFF] BR X """ + if idau.WORD_SIZE != 8: + return None + adrp, ldr, br = idau.Instructions(stub, count=3) if (adrp.itype == idaapi.ARM_adrp and adrp.Op1.type == idaapi.o_reg and adrp.Op2.type == idaapi.o_imm @@ -55,8 +58,35 @@ def _process_stub_template_1(stub): if target and idau.is_mapped(target): return target +def _process_stub_template_2(stub): + """A template to match the following stub pattern: + + MOV R, #( - PC + 2) + ADD R, PC + LDR.W R, [] + BX R + """ + if idau.WORD_SIZE != 4: + return None + + movl, add, ldr, bx = idau.Instructions(stub, count=4) + if (movl.itype == idaapi.ARM_movl and movl.Op1.type == idaapi.o_reg + and movl.Op2.type == idaapi.o_imm + and add.itype == idaapi.ARM_add and add.Op1.type == idaapi.o_reg + and add.Op2.type == idaapi.o_reg and add.Op2.reg == 15 # PC + and ldr.itype == idaapi.ARM_ldr and ldr.Op2.type == idaapi.o_displ + and bx.itype == idaapi.ARM_bx and bx.Op1.type == idaapi.o_reg + and movl.Op1.reg == add.Op1.reg == ldr.Op2.reg + and ldr.Op1.reg == bx.Op1.reg): + offset = movl.Op2.value + add.ea + 4 # +4 because fuck arm 32 + target = idau.read_word(offset) + if target and idau.is_mapped(target): + return target & ~1 + + _stub_processors = ( _process_stub_template_1, + _process_stub_template_2, ) def stub_target(stub_func): @@ -142,6 +172,8 @@ def _process_stubs_section(segstart, make_thunk, next_stub): if idc.isRef(idc.GetFlags(ea)) and not stub_name_target(idau.get_ea_name(ea)): _process_possible_stub(ea, make_thunk, next_stub) +STUB_SECT_POSTFIX = '__stubs' if idau.WORD_SIZE == 8 else '__stub' + def initialize_stub_symbols(make_thunk=True): """Populate IDA with information about the stubs in an iOS kernelcache. @@ -156,7 +188,7 @@ def initialize_stub_symbols(make_thunk=True): next_stub = internal.make_name_generator(kernelcache_stub_suffix) for ea in idautils.Segments(): segname = idc.SegName(ea) - if not segname.endswith('__stubs'): + if not segname.endswith(STUB_SECT_POSTFIX): continue _log(3, 'Processing segment {}', segname) _process_stubs_section(ea, make_thunk, next_stub) diff --git a/ida_kernelcache/symbol.py b/ida_kernelcache/symbol.py index a05332b..fba3773 100644 --- a/ida_kernelcache/symbol.py +++ b/ida_kernelcache/symbol.py @@ -91,7 +91,7 @@ def make_ident(name): """Convert a name into a valid identifier, substituting any invalid characters.""" ident = '' for c in name: - if idaapi.is_ident_char(c): + if idaapi.is_ident_char(ord(c)): ident += c else: ident += '_' diff --git a/ida_kernelcache/vtable.py b/ida_kernelcache/vtable.py index d454e54..93c7efb 100644 --- a/ida_kernelcache/vtable.py +++ b/ida_kernelcache/vtable.py @@ -93,13 +93,32 @@ def return_value(possible, length): return return_value(False, zeros) # We can skip all but the last VTABLE_OFFSET zeros. return return_value(False, zeros - VTABLE_OFFSET) - # TODO: We should verify that all vtable entries refer to code. # Now we know that we have at least one nonzero value, our job is easier. Get the full length # of the vtable, including the first VTABLE_OFFSET entries and the subsequent nonzero entries, # until either we find a zero word (not included) or run out of words in the stream. - length = VTABLE_OFFSET + 1 + idau.iterlen(takewhile(lambda word: word != 0, words)) - # Now it's simple: We are valid if the length is long enough, invalid if it's too short. - return return_value(length >= MIN_VTABLE_LENGTH, length) + funcs = list(takewhile(lambda word: word != 0, words)) + length = VTABLE_OFFSET + 1 + len(funcs) + + # There's no need to check if found funcs refer to code if they're + # too short + + if length < MIN_VTABLE_LENGTH: + return return_value(False, length) + + # We need to fill funcs with nonzero values, and only then check all + # values and fail if they don't refer to code -- so we can skip + # sequences like "code data zero", which are obviously invalid + for f in [first] + funcs: + # For some reason checking for perms on segment didn't work + # properly for me (it always returned 0), so I decieded to check + # segment names instead and see if they contain "text" or "stub" + s = idc.SegName(f).lower() + if 'text' not in s and 'stub' not in s: + _log(5, "element in vtable at {:#x} isn't from __text or __stub but rather from {}", ea, s) + return return_value(False, length) + + # If we're still there then vtable is valid + return return_value(True, length) def convert_vtable_to_offsets(vtable, length=None): """Convert a vtable into a sequence of offsets.