From 4be8c3036abd09dc341018bbba56d01a19303a4f Mon Sep 17 00:00:00 2001 From: Rob Marsal Date: Tue, 4 Nov 2025 14:17:52 +0000 Subject: [PATCH] feat(PRO-1733): allow attaching to matching analyses by hash only Enforce boundaries checks at function level --- .../ui/autounstrip/AutoUnstripDialog.java | 7 +++ .../AbstractFunctionMatchingDialog.java | 2 +- .../RecentAnalysesTableModel.java | 10 ---- .../services/api/GhidraRevengService.java | 15 ++++-- .../ExportFunctionBoundariesService.java | 7 --- .../ExportFunctionBoundariesServiceImpl.java | 54 ------------------- 6 files changed, 18 insertions(+), 77 deletions(-) diff --git a/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/autounstrip/AutoUnstripDialog.java b/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/autounstrip/AutoUnstripDialog.java index c98dd9e7..df5b121f 100644 --- a/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/autounstrip/AutoUnstripDialog.java +++ b/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/autounstrip/AutoUnstripDialog.java @@ -4,6 +4,7 @@ import ai.reveng.toolkit.ghidra.core.services.api.GhidraRevengService; import ai.reveng.toolkit.ghidra.core.services.api.types.AnalysisID; import ai.reveng.toolkit.ghidra.core.services.api.types.AutoUnstripResponse; +import ai.reveng.toolkit.ghidra.core.services.api.types.FunctionID; import ai.reveng.toolkit.ghidra.core.types.ProgramWithBinaryID; import ai.reveng.toolkit.ghidra.plugins.ReaiPluginPackage; import ghidra.framework.plugintool.PluginTool; @@ -107,6 +108,9 @@ private void importFunctionNames(AutoUnstripResponse autoUnstripResponse) { // Retrieve the mangled names map once outside the transaction var mangledNameMapOpt = revengService.getFunctionMangledNamesMap(program); + // Retrieve the function ID map once outside the transaction + var functionMap = revengService.getFunctionMap(program); + program.withTransaction("Apply Auto-Unstrip Function Names", () -> { try { var revengMatchNamespace = program.getSymbolTable().getOrCreateNameSpace( @@ -121,6 +125,7 @@ private void importFunctionNames(AutoUnstripResponse autoUnstripResponse) { var revEngMangledName = match.suggested_name(); var revEngDemangledName = match.suggested_demangled_name(); + var functionID = functionMap.get(new FunctionID(match.function_id().value())); if ( func != null && @@ -132,6 +137,8 @@ private void importFunctionNames(AutoUnstripResponse autoUnstripResponse) { // Only accept valid names (no spaces) !revEngMangledName.contains(" ") && !revEngDemangledName.contains(" ") + // Only rename if the function ID is known (boundaries matched) + && functionID != null ) { try { // Capture original name before renaming diff --git a/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/functionmatching/AbstractFunctionMatchingDialog.java b/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/functionmatching/AbstractFunctionMatchingDialog.java index 42e7ea36..d8fa825e 100644 --- a/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/functionmatching/AbstractFunctionMatchingDialog.java +++ b/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/functionmatching/AbstractFunctionMatchingDialog.java @@ -135,7 +135,7 @@ protected void processFunctionMatchingResults(FunctionMatchingBatchResponse resp Function localFunction = functionMap.get(new FunctionID(matchResult.getFunctionId())); if (localFunction == null) { - // If we can't find the local function, skip this match + // If we can't find the local function, skip this match (boundaries do not match the remote ones) return; } diff --git a/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/recentanalyses/RecentAnalysesTableModel.java b/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/recentanalyses/RecentAnalysesTableModel.java index 4d059a11..18fba582 100644 --- a/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/recentanalyses/RecentAnalysesTableModel.java +++ b/src/main/java/ai/reveng/toolkit/ghidra/binarysimilarity/ui/recentanalyses/RecentAnalysesTableModel.java @@ -48,16 +48,6 @@ protected void doLoad(Accumulator accumulator, TaskMonitor return; } - // Filter out analyses where the function boundaries hash does not match our program - var functionBoundariesHash = functionBoundariesService.getFunctionBoundariesHash(); - if (!result.function_boundaries_hash().equals(functionBoundariesHash)) { - loggingService.info( - "[RevEng] Skipping analysis for " + result.binary_id() + " as function boundaries hash does" + - " not match. Expected " + functionBoundariesHash + " but got " + - result.function_boundaries_hash()); - return; - } - accumulator.add(result); } ); diff --git a/src/main/java/ai/reveng/toolkit/ghidra/core/services/api/GhidraRevengService.java b/src/main/java/ai/reveng/toolkit/ghidra/core/services/api/GhidraRevengService.java index 1009c547..f47ac640 100644 --- a/src/main/java/ai/reveng/toolkit/ghidra/core/services/api/GhidraRevengService.java +++ b/src/main/java/ai/reveng/toolkit/ghidra/core/services/api/GhidraRevengService.java @@ -188,6 +188,7 @@ private void loadFunctionInfo(Program program, BinaryID binID) throws ApiExcepti StringPropertyMap finalMangledNameMap = mangledNameMap; AtomicInteger ghidraRenamedFunctions = new AtomicInteger(); + AtomicInteger ghidraBoundariesMatchedFunction = new AtomicInteger(); functionInfo.forEach( info -> { var oFunc = getFunctionFor(info, program); @@ -215,9 +216,11 @@ private void loadFunctionInfo(Program program, BinaryID binID) throws ApiExcepti } var funcSize = func.getBody().getNumAddresses(); + // For unclear reasons the func size is off by one if (funcSize - 1 != info.functionSize() && funcSize != info.functionSize()){ Msg.warn(this, "Function size mismatch for function %s: %d vs %d".formatted(ghidraMangledName, funcSize, info.functionSize())); + return; } // Source types: @@ -238,6 +241,8 @@ private void loadFunctionInfo(Program program, BinaryID binID) throws ApiExcepti } finalFunctionIDMap.add(func.getEntryPoint(), info.functionID().value()); finalMangledNameMap.add(func.getEntryPoint(), revEngMangledName); + + ghidraBoundariesMatchedFunction.getAndIncrement(); } ); @@ -256,14 +261,14 @@ private void loadFunctionInfo(Program program, BinaryID binID) throws ApiExcepti program.endTransaction(transactionID, true); // Print summary - Msg.debug( - this, - "Loaded %d functions from RevEng.AI, renamed %d, Ghidra has %d functions".formatted( + Msg.showInfo(this, null, ReaiPluginPackage.WINDOW_PREFIX + "Function loading summary", + ("Found %d functions from RevEng.AI. Renamed %d. Your local Ghidra instance has %d/%d matching function " + + "boundaries. For better results, please start a new analysis from this plugin.").formatted( functionInfo.size(), ghidraRenamedFunctions.get(), + ghidraBoundariesMatchedFunction.get(), ghidraFunctionCount.get() - ) - ); + )); } /** diff --git a/src/main/java/ai/reveng/toolkit/ghidra/core/services/function/export/ExportFunctionBoundariesService.java b/src/main/java/ai/reveng/toolkit/ghidra/core/services/function/export/ExportFunctionBoundariesService.java index 52ec4e56..1752bcab 100644 --- a/src/main/java/ai/reveng/toolkit/ghidra/core/services/function/export/ExportFunctionBoundariesService.java +++ b/src/main/java/ai/reveng/toolkit/ghidra/core/services/function/export/ExportFunctionBoundariesService.java @@ -29,11 +29,4 @@ public interface ExportFunctionBoundariesService { * @return */ public JSONArray getFunctionsArray(); - - /** - * Return a hash of the function boundaries for change detection - * Note that this algorithm must match that used on the API server side! - * @return - */ - public String getFunctionBoundariesHash(); } diff --git a/src/main/java/ai/reveng/toolkit/ghidra/core/services/function/export/ExportFunctionBoundariesServiceImpl.java b/src/main/java/ai/reveng/toolkit/ghidra/core/services/function/export/ExportFunctionBoundariesServiceImpl.java index 51c673b6..97dc3f22 100644 --- a/src/main/java/ai/reveng/toolkit/ghidra/core/services/function/export/ExportFunctionBoundariesServiceImpl.java +++ b/src/main/java/ai/reveng/toolkit/ghidra/core/services/function/export/ExportFunctionBoundariesServiceImpl.java @@ -80,58 +80,4 @@ public JSONArray getFunctionsArray() { } return fArray; } - - @Override - public String getFunctionBoundariesHash() { - if (!isReady) - init(); - - // Collect all function boundaries into a list - List boundaries = new ArrayList<>(); - for (Function f : fm.getFunctions(true)) { - boundaries.add(getFunctionAt(f.getEntryPoint())); - } - - // Sort the boundaries by start address (convert hex string to long for proper sorting) - boundaries.sort(Comparator.comparingLong(b -> Long.parseUnsignedLong( - b.getString("start_addr").substring(2), 16))); - - // Create a formatted string representation of the boundaries - StringBuilder boundariesStr = new StringBuilder(); - for (int i = 0; i < boundaries.size(); i++) { - JSONObject b = boundaries.get(i); - if (i > 0) { - boundariesStr.append(","); - } - - // Convert hex addresses to integer representation - String startAddrHex = b.getString("start_addr"); - String endAddrHex = b.getString("end_addr"); - long startAddrInt = Long.parseUnsignedLong(startAddrHex.substring(2), 16); - long endAddrInt = Long.parseUnsignedLong(endAddrHex.substring(2), 16); - - boundariesStr.append(startAddrInt) - .append("-") - .append(endAddrInt); - } - - // Generate SHA-256 hash of the boundaries string - try { - MessageDigest digest = MessageDigest.getInstance("SHA-256"); - byte[] hashBytes = digest.digest(boundariesStr.toString().getBytes()); - - // Convert to hexadecimal string - StringBuilder hexString = new StringBuilder(); - for (byte b : hashBytes) { - String hex = Integer.toHexString(0xff & b); - if (hex.length() == 1) { - hexString.append('0'); - } - hexString.append(hex); - } - return hexString.toString(); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException("SHA-256 algorithm not available", e); - } - } }