Skip to content

Commit ed8fda2

Browse files
committed
test: add emulator hook tests (and fix related bugs)
1 parent 059ab26 commit ed8fda2

File tree

2 files changed

+70
-12
lines changed

2 files changed

+70
-12
lines changed

ghidralib.py

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,9 @@ def name(self):
698698
class Varnode(GhidraWrapper):
699699
@property
700700
def has_value(self): # type: () -> bool
701+
"""Return true if this varnode can be converted to a integer value.
702+
703+
In particuler, this will return true for Address and Constant varnodes"""
701704
return self.is_address or self.is_constant
702705

703706
@property
@@ -799,11 +802,15 @@ def simple(self): # type: () -> int|str
799802
if self.has_value:
800803
return self.value
801804
elif self.is_register:
802-
return self.as_register
805+
if self.is_named_register:
806+
return self.as_register
807+
return "reg:{:x}:{:x}".format(self.offset, self.size)
803808
elif self.is_unique:
804-
return "uniq:{:x}".format(self.offset)
809+
return "uniq:{:x}:{:x}".format(self.offset, self.size)
805810
elif self.is_hash:
806-
return "hash:{:x}".format(self.offset)
811+
return "hash:{:x}:{:x}".format(self.offset, self.size)
812+
elif self.is_stack:
813+
return "stack:{:x}:{:x}".format(self.offset, self.size)
807814
raise RuntimeError("Unknown varnode type")
808815

809816
@property
@@ -2552,10 +2559,22 @@ class Symbol(GhidraWrapper):
25522559
"""Wraps a Ghidra Symbol object."""
25532560

25542561
@staticmethod
2555-
def __get_thunk_if_it_exists(external_symbol): # type: (JavaObject) -> JavaObject
2562+
def resolve_thunk_if_exists(external_symbol): # type: (JavaObject) -> JavaObject
25562563
"""Returns a function thunk leading to a passed external symbol, if it exists.
25572564
2558-
If there is no function thunk, original symbol is returned."""
2565+
If there is no function thunk, original symbol is returned.
2566+
2567+
Why is this ugly thing here? Well, we want to support external symbols,
2568+
especially external functions. Thunks are much more useful for us when
2569+
thinking in context of the analysed program - when Linux program calls
2570+
`printf` it jumps to the appropriate `printf` thunk, not to libc
2571+
directly. So this is the location that we want to patch/hook/trace/etc when
2572+
thinking about printf. But the thing is that Ghidra SymbolTable API will
2573+
not even return thunks! So we trace the external function references, and
2574+
return the first (almost certainly only) Thunk reference.
2575+
2576+
:param external_symbol: Symbol to find thunk for (if it exists).
2577+
"""
25592578
xrefs = list(external_symbol.getReferences())
25602579
for xref in xrefs:
25612580
if xref.getReferenceType() == GhRefType.THUNK:
@@ -2565,6 +2584,27 @@ def __get_thunk_if_it_exists(external_symbol): # type: (JavaObject) -> JavaObje
25652584
return thunk
25662585
return external_symbol
25672586

2587+
@staticmethod
2588+
def resolve_external(external_symbol): # type: (JavaObject) -> int
2589+
"""Resolves an external address to a RAM location, if possible.
2590+
2591+
If the symbol has no RAM location, just return its offset.
2592+
2593+
Why is this ugly thing here? Again, we want to support external symbols, and
2594+
we are interested in their RAM address in the program address space. In some
2595+
cases, Ghidra will give an external address a "location" in the RAM space.
2596+
So, for example, if current program jumps to that external function (or read
2597+
that external variable etc), it will read that location as far as Ghidra is
2598+
concerned (for example, Emulator will use it for calls). This is important
2599+
for emulating Windows binaries, that use address tables for imports.
2600+
2601+
:param external_symbol: External symbol to resolve."""
2602+
external_manager = Program.current().getExternalManager()
2603+
ram_addr = external_manager.getExternalLocation(external_symbol).getAddress()
2604+
if ram_addr:
2605+
return ram_addr.getOffset()
2606+
return external_symbol.getAddress().getOffset()
2607+
25682608
@staticmethod
25692609
def get(raw_or_name): # type: (JavaObject|str|Addr) -> Symbol|None
25702610
"""Get a symbol with the provided name or at the provided address.
@@ -2582,7 +2622,7 @@ def get(raw_or_name): # type: (JavaObject|str|Addr) -> Symbol|None
25822622
return None
25832623
raw = symbols[0]
25842624
if raw.isExternal():
2585-
raw = Symbol.__get_thunk_if_it_exists(raw)
2625+
raw = Symbol.resolve_thunk_if_exists(raw)
25862626
elif can_resolve(raw_or_name):
25872627
raw = (
25882628
Program.current()
@@ -2625,6 +2665,8 @@ def remove(address, name): # type: (Addr, str) -> None
26252665
@property
26262666
def address(self): # type: () -> int
26272667
"""Get the address of this symbol."""
2668+
if self.is_external:
2669+
return Symbol.resolve_external(self.raw)
26282670
return self.raw.getAddress().getOffset()
26292671

26302672
@property
@@ -2812,11 +2854,11 @@ def add_hook(
28122854
self._hooks[addr] = hook
28132855

28142856
def has_hook_at(self, address): # type: (Addr) -> bool
2815-
addr = resolve(address)
2857+
addr = resolve(address).getOffset()
28162858
return addr in self._hooks
28172859

28182860
def delete_hook_at(self, address): # type: (Addr) -> None
2819-
addr = resolve(address)
2861+
addr = resolve(address).getOffset()
28202862
del self._hooks[addr]
28212863

28222864
@property

tests/ghidralib_test.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -382,13 +382,13 @@ def test_function():
382382
assert func.repeatable_comment == ""
383383

384384
assert len(func.parameters) == 0
385-
assert len(func.local_variables) > 3
385+
assert len(func.local_variables) >= 3
386386
assert func.local_variables[0].raw
387-
assert len(func.variables) > 3
387+
assert len(func.variables) >= 3
388388
assert func.variables[0].raw
389-
assert len(func.varnodes) > 3
389+
assert len(func.varnodes) >= 3
390390
assert func.varnodes[0].raw
391-
assert len(func.high_variables) > 3
391+
assert len(func.high_variables) >= 3
392392
assert func.high_variables[0].raw
393393
assert len(func.stack) > 1
394394
assert func.stack[0].raw
@@ -472,6 +472,8 @@ def test_symbol():
472472
assert Symbol.get("bar") is None
473473
assert Symbol.get(0x00403CDE) is None
474474

475+
assert Symbol("wsprintfA").address == 0xB8AA # Resolve external address
476+
475477

476478
###############################################################
477479
# Test DataType
@@ -548,6 +550,20 @@ def test_emulator():
548550
emu.emulate_while(fnc.entrypoint, lambda e: e.pc in fnc.body)
549551
assert emu.read_unicode(emu["eax"]) == "HKEY_CLASSES_ROOT"
550552

553+
mock_executed = [False]
554+
555+
def nullsub(emu):
556+
mock_executed[0] = True
557+
emu.pc = emu.read_u64(emu.sp)
558+
emu.sp += 8
559+
return True
560+
561+
fun = Function(0x406035)
562+
emu = Emulator()
563+
emu.add_hook("lstrcpynW", nullsub)
564+
emu.emulate(fun.entrypoint, fun.exitpoints)
565+
assert mock_executed[0]
566+
551567

552568
###############################################################
553569
# Test Program

0 commit comments

Comments
 (0)