diff --git a/.gitignore b/.gitignore index 2bdf632f..926efd3e 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,4 @@ replay_pid* # Third party JAR files from Ghidra lib/*.jar +__pycache__/ diff --git a/README.md b/README.md index b5b92bc0..6112c138 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,11 @@ MCP Server + Ghidra Plugin - Decompile and analyze binaries in Ghidra - Automatically rename methods and data - List methods, classes, imports, and exports +- BSim integration for function similarity matching + - Connect to BSim databases (H2, PostgreSQL) + - Query individual functions for similar matches + - Batch query all functions in a program + - View similarity scores, confidence levels, and executable metadata # Installation @@ -108,6 +113,13 @@ Another MCP client that supports multiple models on the backend is [5ire](https: - `Ghidra/Framework/SoftwareModeling/lib/SoftwareModeling.jar` - `Ghidra/Framework/Utility/lib/Utility.jar` - `Ghidra/Framework/Gui/lib/Gui.jar` +- `Ghidra/Features/BSim/lib/BSim.jar lib/BSim.jar` +- `Ghidra/Features/BSim/lib/commons-dbcp2-2.9.0.jar lib/commons-dbcp2-2.9.0.jar` +- `Ghidra/Features/BSim/lib/commons-logging-1.2.jar lib/commons-logging-1.2.jar` +- `Ghidra/Features/BSim/lib/commons-pool2-2.11.1.jar lib/commons-pool2-2.11.1.jar` +- `Ghidra/Features/BSim/lib/h2-2.2.220.jar lib/h2-2.2.220.jar` +- `Ghidra/Features/BSim/lib/postgresql-42.7.6.jar lib/postgresql-42.7.6.jar` + 2. Build with Maven by running: `mvn clean package assembly:single` diff --git a/bridge_mcp_ghidra.py b/bridge_mcp_ghidra.py index c492562d..704ba63d 100644 --- a/bridge_mcp_ghidra.py +++ b/bridge_mcp_ghidra.py @@ -15,6 +15,7 @@ from mcp.server.fastmcp import FastMCP DEFAULT_GHIDRA_SERVER = "http://127.0.0.1:8080/" +DEFAULT_REQUEST_TIMEOUT = 5 logger = logging.getLogger(__name__) @@ -22,6 +23,8 @@ # Initialize ghidra_server_url with default value ghidra_server_url = DEFAULT_GHIDRA_SERVER +# Initialize ghidra_request_timeout with default value +ghidra_request_timeout = DEFAULT_REQUEST_TIMEOUT def safe_get(endpoint: str, params: dict = None) -> list: """ @@ -33,7 +36,7 @@ def safe_get(endpoint: str, params: dict = None) -> list: url = urljoin(ghidra_server_url, endpoint) try: - response = requests.get(url, params=params, timeout=5) + response = requests.get(url, params=params, timeout=ghidra_request_timeout) response.encoding = 'utf-8' if response.ok: return response.text.splitlines() @@ -46,9 +49,10 @@ def safe_post(endpoint: str, data: dict | str) -> str: try: url = urljoin(ghidra_server_url, endpoint) if isinstance(data, dict): - response = requests.post(url, data=data, timeout=5) + # BSim queries might be a bit slower, using configurable timeout + response = requests.post(url, data=data, timeout=ghidra_request_timeout) else: - response = requests.post(url, data=data.encode("utf-8"), timeout=5) + response = requests.post(url, data=data.encode("utf-8"), timeout=ghidra_request_timeout) response.encoding = 'utf-8' if response.ok: return response.text.strip() @@ -287,6 +291,174 @@ def list_strings(offset: int = 0, limit: int = 2000, filter: str = None) -> list params["filter"] = filter return safe_get("strings", params) +@mcp.tool() +def bsim_select_database(database_path: str) -> str: + """ + Select and connect to a BSim database for function similarity matching. + + Args: + database_path: Path to BSim database file (e.g., "/path/to/database.bsim") + or URL (e.g., "postgresql://host:port/dbname") + + Returns: + Connection status and database information + """ + return safe_post("bsim/select_database", {"database_path": database_path}) + +@mcp.tool() +def bsim_query_function( + function_address: str, + max_matches: int = 10, + similarity_threshold: float = 0.7, + confidence_threshold: float = 0.0, + max_similarity: float | None = None, + max_confidence: float | None = None, + offset: int = 0, + limit: int = 100, +) -> str: + """ + Query a single function against the BSim database to find similar functions. + + Args: + function_address: Address of the function to query (e.g., "0x401000") + max_matches: Maximum number of matches to return (default: 10) + similarity_threshold: Minimum similarity score (inclusive, 0.0-1.0, default: 0.7) + confidence_threshold: Minimum confidence score (inclusive, 0.0-1.0, default: 0.0) + max_similarity: Maximum similarity score (exclusive, 0.0-1.0, default: unbounded) + max_confidence: Maximum confidence score (exclusive, 0.0-1.0, default: unbounded) + offset: Pagination offset (default: 0) + limit: Maximum number of results to return (default: 100) + + Returns: + List of matching functions with similarity scores and metadata + """ + data = { + "function_address": function_address, + "max_matches": str(max_matches), + "similarity_threshold": str(similarity_threshold), + "confidence_threshold": str(confidence_threshold), + "offset": str(offset), + "limit": str(limit), + } + + if max_similarity is not None: + data["max_similarity"] = str(max_similarity) + if max_confidence is not None: + data["max_confidence"] = str(max_confidence) + + return safe_post("bsim/query_function", data) + +@mcp.tool() +def bsim_query_all_functions( + max_matches_per_function: int = 5, + similarity_threshold: float = 0.7, + confidence_threshold: float = 0.0, + max_similarity: float | None = None, + max_confidence: float | None = None, + offset: int = 0, + limit: int = 100, +) -> str: + """ + Query all functions in the current program against the BSim database. + Returns an overview of matches for all functions. + + Args: + max_matches_per_function: Max matches per function (default: 5) + similarity_threshold: Minimum similarity score (inclusive, 0.0-1.0, default: 0.7) + confidence_threshold: Minimum confidence score (inclusive, 0.0-1.0, default: 0.0) + max_similarity: Maximum similarity score (exclusive, 0.0-1.0, default: unbounded) + max_confidence: Maximum confidence score (exclusive, 0.0-1.0, default: unbounded) + offset: Pagination offset (default: 0) + limit: Maximum number of results to return (default: 100) + + Returns: + Summary and detailed results for all matching functions + """ + data = { + "max_matches_per_function": str(max_matches_per_function), + "similarity_threshold": str(similarity_threshold), + "confidence_threshold": str(confidence_threshold), + "offset": str(offset), + "limit": str(limit), + } + + if max_similarity is not None: + data["max_similarity"] = str(max_similarity) + if max_confidence is not None: + data["max_confidence"] = str(max_confidence) + + return safe_post("bsim/query_all_functions", data) + +@mcp.tool() +def bsim_disconnect() -> str: + """ + Disconnect from the current BSim database. + + Returns: + Disconnection status message + """ + return safe_post("bsim/disconnect", {}) + +@mcp.tool() +def bsim_status() -> str: + """ + Get the current BSim database connection status. + + Returns: + Current connection status and database path if connected + """ + return "\n".join(safe_get("bsim/status")) + +@mcp.tool() +def bsim_get_match_disassembly( + executable_path: str, + function_name: str, + function_address: str, +) -> str: + """ + Get the disassembly of a specific BSim match. This requires the matched + executable to be available in the Ghidra project. + + Args: + executable_path: Path to the matched executable (from BSim match result) + function_name: Name of the matched function + function_address: Address of the matched function (e.g., "0x401000") + + Returns: + Function prototype and assembly code for the matched function. + Returns an error message if the program is not found in the project. + """ + return safe_post("bsim/get_match_disassembly", { + "executable_path": executable_path, + "function_name": function_name, + "function_address": function_address, + }) + +@mcp.tool() +def bsim_get_match_decompile( + executable_path: str, + function_name: str, + function_address: str, +) -> str: + """ + Get the decompilation of a specific BSim match. This requires the matched + executable to be available in the Ghidra project. + + Args: + executable_path: Path to the matched executable (from BSim match result) + function_name: Name of the matched function + function_address: Address of the matched function (e.g., "0x401000") + + Returns: + Function prototype and decompiled C code for the matched function. + Returns an error message if the program is not found in the project. + """ + return safe_post("bsim/get_match_decompile", { + "executable_path": executable_path, + "function_name": function_name, + "function_address": function_address, + }) + def main(): parser = argparse.ArgumentParser(description="MCP server for Ghidra") parser.add_argument("--ghidra-server", type=str, default=DEFAULT_GHIDRA_SERVER, @@ -297,13 +469,19 @@ def main(): help="Port to run MCP server on (only used for sse), default: 8081") parser.add_argument("--transport", type=str, default="stdio", choices=["stdio", "sse"], help="Transport protocol for MCP, default: stdio") + parser.add_argument("--ghidra-timeout", type=int, default=DEFAULT_REQUEST_TIMEOUT, + help=f"MCP requests timeout, default: {DEFAULT_REQUEST_TIMEOUT}") args = parser.parse_args() - + # Use the global variable to ensure it's properly updated global ghidra_server_url if args.ghidra_server: ghidra_server_url = args.ghidra_server - + + global ghidra_request_timeout + if args.ghidra_timeout: + ghidra_request_timeout = args.ghidra_timeout + if args.transport == "sse": try: # Set up logging diff --git a/pom.xml b/pom.xml index 6452d96e..f50ed492 100644 --- a/pom.xml +++ b/pom.xml @@ -15,60 +15,104 @@ ghidra Generic - 11.3.2 + 11.4.2 system ${project.basedir}/lib/Generic.jar ghidra SoftwareModeling - 11.3.2 + 11.4.2 system ${project.basedir}/lib/SoftwareModeling.jar ghidra Project - 11.3.2 + 11.4.2 system ${project.basedir}/lib/Project.jar ghidra Docking - 11.3.2 + 11.4.2 system ${project.basedir}/lib/Docking.jar ghidra Decompiler - 11.3.2 + 11.4.2 system ${project.basedir}/lib/Decompiler.jar ghidra Utility - 11.3.2 + 11.4.2 system ${project.basedir}/lib/Utility.jar ghidra Base - 11.3.2 + 11.4.2 system ${project.basedir}/lib/Base.jar ghidra Gui - 11.3.2 + 11.4.2 system ${project.basedir}/lib/Gui.jar + + + ghidra + BSim + 11.4.2 + system + ${project.basedir}/lib/BSim.jar + + + org.apache.commons + commons-dbcp2 + 2.9.0 + system + ${project.basedir}/lib/commons-dbcp2-2.9.0.jar + + + commons-logging + commons-logging + 1.2 + system + ${project.basedir}/lib/commons-logging-1.2.jar + + + org.apache.commons + commons-pool2 + 2.11.1 + system + ${project.basedir}/lib/commons-pool2-2.11.1.jar + + + com.h2database + h2 + 2.2.220 + system + ${project.basedir}/lib/h2-2.2.220.jar + + + org.postgresql + postgresql + 42.7.6 + system + ${project.basedir}/lib/postgresql-42.7.6.jar + + junit @@ -80,10 +124,20 @@ + + org.apache.maven.plugins + maven-compiler-plugin + 3.14.0 + + 21 + 21 + + + maven-jar-plugin - 3.2.2 + 3.4.2 src/main/resources/META-INF/MANIFEST.MF diff --git a/src/main/java/com/lauriewired/GhidraMCPPlugin.java b/src/main/java/com/lauriewired/GhidraMCPPlugin.java index 88583bab..f191ddcd 100644 --- a/src/main/java/com/lauriewired/GhidraMCPPlugin.java +++ b/src/main/java/com/lauriewired/GhidraMCPPlugin.java @@ -1,54 +1,41 @@ package com.lauriewired; -import ghidra.framework.plugintool.Plugin; -import ghidra.framework.plugintool.PluginTool; -import ghidra.program.model.address.Address; -import ghidra.program.model.address.GlobalNamespace; -import ghidra.program.model.listing.*; -import ghidra.program.model.mem.MemoryBlock; -import ghidra.program.model.symbol.*; -import ghidra.program.model.symbol.ReferenceManager; -import ghidra.program.model.symbol.Reference; -import ghidra.program.model.symbol.ReferenceIterator; -import ghidra.program.model.symbol.RefType; -import ghidra.program.model.pcode.HighFunction; -import ghidra.program.model.pcode.HighSymbol; -import ghidra.program.model.pcode.LocalSymbolMap; -import ghidra.program.model.pcode.HighFunctionDBUtil; -import ghidra.program.model.pcode.HighFunctionDBUtil.ReturnCommitOption; +import com.sun.net.httpserver.HttpExchange; +import com.sun.net.httpserver.HttpServer; import ghidra.app.decompiler.DecompInterface; import ghidra.app.decompiler.DecompileResults; import ghidra.app.plugin.PluginCategoryNames; import ghidra.app.services.CodeViewerService; import ghidra.app.services.ProgramManager; -import ghidra.app.util.PseudoDisassembler; -import ghidra.app.cmd.function.SetVariableNameCmd; -import ghidra.program.model.symbol.SourceType; -import ghidra.program.model.listing.LocalVariableImpl; -import ghidra.program.model.listing.ParameterImpl; -import ghidra.util.exception.DuplicateNameException; -import ghidra.util.exception.InvalidInputException; +import ghidra.framework.options.Options; +import ghidra.framework.plugintool.Plugin; import ghidra.framework.plugintool.PluginInfo; +import ghidra.framework.plugintool.PluginTool; import ghidra.framework.plugintool.util.PluginStatus; -import ghidra.program.util.ProgramLocation; -import ghidra.util.Msg; -import ghidra.util.task.ConsoleTaskMonitor; -import ghidra.util.task.TaskMonitor; -import ghidra.program.model.pcode.HighVariable; -import ghidra.program.model.pcode.Varnode; +import ghidra.program.model.address.Address; +import ghidra.program.model.address.GlobalNamespace; import ghidra.program.model.data.DataType; import ghidra.program.model.data.DataTypeManager; import ghidra.program.model.data.PointerDataType; -import ghidra.program.model.data.Undefined1DataType; -import ghidra.program.model.listing.Variable; -import ghidra.app.decompiler.component.DecompilerUtils; -import ghidra.app.decompiler.ClangToken; -import ghidra.framework.options.Options; +import ghidra.program.model.listing.*; +import ghidra.program.model.mem.MemoryBlock; +import ghidra.program.model.pcode.*; +import ghidra.program.model.pcode.HighFunctionDBUtil.ReturnCommitOption; +import ghidra.program.model.symbol.*; +import ghidra.program.util.ProgramLocation; +import ghidra.util.Msg; +import ghidra.util.task.ConsoleTaskMonitor; -import com.sun.net.httpserver.HttpExchange; -import com.sun.net.httpserver.HttpServer; +// BSim imports +import ghidra.features.bsim.query.FunctionDatabase; +import ghidra.features.bsim.query.BSimServerInfo; +import ghidra.features.bsim.query.BSimClientFactory; +import ghidra.features.bsim.query.GenSignatures; +import ghidra.features.bsim.query.description.*; +import ghidra.features.bsim.query.protocol.*; +import generic.lsh.vector.LSHVectorFactory; -import javax.swing.SwingUtilities; +import javax.swing.*; import java.io.IOException; import java.io.OutputStream; import java.lang.reflect.InvocationTargetException; @@ -70,7 +57,15 @@ public class GhidraMCPPlugin extends Plugin { private HttpServer server; private static final String OPTION_CATEGORY_NAME = "GhidraMCP HTTP Server"; private static final String PORT_OPTION_NAME = "Server Port"; + private static final String DECOMPILE_TIMEOUT_OPTION_NAME = "Decompile Timeout"; private static final int DEFAULT_PORT = 8080; + private static final int DEFAULT_DECOMPILE_TIMEOUT = 30; + + private int decompileTimeout; + + // BSim database connection state + private FunctionDatabase bsimDatabase = null; + private String currentBSimDatabasePath = null; public GhidraMCPPlugin(PluginTool tool) { super(tool); @@ -83,6 +78,10 @@ public GhidraMCPPlugin(PluginTool tool) { "The network port number the embedded HTTP server will listen on. " + "Requires Ghidra restart or plugin reload to take effect after changing."); + options.registerOption(DECOMPILE_TIMEOUT_OPTION_NAME, DEFAULT_DECOMPILE_TIMEOUT, + null, + "Decompilation timeout. " + + "Requires Ghidra restart or plugin reload to take effect after changing."); try { startServer(); } @@ -96,6 +95,7 @@ private void startServer() throws IOException { // Read the configured port Options options = tool.getOptions(OPTION_CATEGORY_NAME); int port = options.getInt(PORT_OPTION_NAME, DEFAULT_PORT); + this.decompileTimeout = options.getInt(DECOMPILE_TIMEOUT_OPTION_NAME, DEFAULT_DECOMPILE_TIMEOUT); // Stop existing server if running (e.g., if plugin is reloaded) if (server != null) { @@ -341,6 +341,62 @@ private void startServer() throws IOException { sendResponse(exchange, listDefinedStrings(offset, limit, filter)); }); + // BSim endpoints + server.createContext("/bsim/select_database", exchange -> { + Map params = parsePostParams(exchange); + String dbPath = params.get("database_path"); + sendResponse(exchange, selectBSimDatabase(dbPath)); + }); + + server.createContext("/bsim/query_function", exchange -> { + Map params = parsePostParams(exchange); + String functionAddress = params.get("function_address"); + int maxMatches = parseIntOrDefault(params.get("max_matches"), 10); + double similarityThreshold = parseDoubleOrDefault(params.get("similarity_threshold"), "0.7"); + double confidenceThreshold = parseDoubleOrDefault(params.get("confidence_threshold"), "0.0"); + double maxSimilarity = parseDoubleOrDefault(params.get("max_similarity"), String.valueOf(Double.POSITIVE_INFINITY)); + double maxConfidence = parseDoubleOrDefault(params.get("max_confidence"), String.valueOf(Double.POSITIVE_INFINITY)); + int offset = parseIntOrDefault(params.get("offset"), 0); + int limit = parseIntOrDefault(params.get("limit"), 100); + sendResponse(exchange, queryBSimFunction(functionAddress, maxMatches, similarityThreshold, confidenceThreshold, maxSimilarity, maxConfidence, offset, limit)); + }); + + server.createContext("/bsim/query_all_functions", exchange -> { + Map params = parsePostParams(exchange); + int maxMatchesPerFunction = parseIntOrDefault(params.get("max_matches_per_function"), 5); + double similarityThreshold = parseDoubleOrDefault(params.get("similarity_threshold"), "0.7"); + double confidenceThreshold = parseDoubleOrDefault(params.get("confidence_threshold"), "0.0"); + double maxSimilarity = parseDoubleOrDefault(params.get("max_similarity"), String.valueOf(Double.POSITIVE_INFINITY)); + double maxConfidence = parseDoubleOrDefault(params.get("max_confidence"), String.valueOf(Double.POSITIVE_INFINITY)); + int offset = parseIntOrDefault(params.get("offset"), 0); + int limit = parseIntOrDefault(params.get("limit"), 100); + sendResponse(exchange, queryAllBSimFunctions(maxMatchesPerFunction, similarityThreshold, confidenceThreshold, maxSimilarity, maxConfidence, offset, limit)); + }); + + server.createContext("/bsim/disconnect", exchange -> { + sendResponse(exchange, disconnectBSimDatabase()); + }); + + server.createContext("/bsim/status", exchange -> { + sendResponse(exchange, getBSimStatus()); + }); + + server.createContext("/bsim/get_match_disassembly", exchange -> { + Map params = parsePostParams(exchange); + String executablePath = params.get("executable_path"); + String functionName = params.get("function_name"); + String functionAddress = params.get("function_address"); + sendResponse(exchange, getBSimMatchDisassembly(executablePath, functionName, functionAddress)); + }); + + server.createContext("/bsim/get_match_decompile", exchange -> { + Map params = parsePostParams(exchange); + String executablePath = params.get("executable_path"); + String functionName = params.get("function_name"); + String functionAddress = params.get("function_address"); + sendResponse(exchange, getBSimMatchDecompile(executablePath, functionName, functionAddress)); + }); + server.setExecutor(null); new Thread(() -> { try { @@ -498,7 +554,7 @@ private String decompileFunctionByName(String name) { for (Function func : program.getFunctionManager().getFunctions(true)) { if (func.getName().equals(name)) { DecompileResults result = - decomp.decompileFunction(func, 30, new ConsoleTaskMonitor()); + decomp.decompileFunction(func, this.decompileTimeout, new ConsoleTaskMonitor()); if (result != null && result.decompileCompleted()) { return result.getDecompiledFunction().getC(); } else { @@ -593,7 +649,7 @@ private String renameVariableInFunction(String functionName, String oldVarName, return "Function not found"; } - DecompileResults result = decomp.decompileFunction(func, 30, new ConsoleTaskMonitor()); + DecompileResults result = decomp.decompileFunction(func, this.decompileTimeout, new ConsoleTaskMonitor()); if (result == null || !result.decompileCompleted()) { return "Decompilation failed"; } @@ -804,12 +860,9 @@ private String decompileFunctionByAddress(String addressStr) { Function func = getFunctionForAddress(program, addr); if (func == null) return "No function found at or containing address " + addressStr; - DecompInterface decomp = new DecompInterface(); - decomp.openProgram(program); - DecompileResults result = decomp.decompileFunction(func, 30, new ConsoleTaskMonitor()); - - return (result != null && result.decompileCompleted()) - ? result.getDecompiledFunction().getC() + String decompCode = decompileFunctionInProgram(func, program); + return (decompCode != null && !decompCode.isEmpty()) + ? decompCode : "Decompilation failed"; } catch (Exception e) { return "Error decompiling function: " + e.getMessage(); @@ -829,27 +882,7 @@ private String disassembleFunction(String addressStr) { Function func = getFunctionForAddress(program, addr); if (func == null) return "No function found at or containing address " + addressStr; - StringBuilder result = new StringBuilder(); - Listing listing = program.getListing(); - Address start = func.getEntryPoint(); - Address end = func.getBody().getMaxAddress(); - - InstructionIterator instructions = listing.getInstructions(start, true); - while (instructions.hasNext()) { - Instruction instr = instructions.next(); - if (instr.getAddress().compareTo(end) > 0) { - break; // Stop if we've gone past the end of the function - } - String comment = listing.getComment(CodeUnit.EOL_COMMENT, instr.getAddress()); - comment = (comment != null) ? "; " + comment : ""; - - result.append(String.format("%s: %s %s\n", - instr.getAddress(), - instr.toString(), - comment)); - } - - return result.toString(); + return disassembleFunctionInProgram(func, program); } catch (Exception e) { return "Error disassembling function: " + e.getMessage(); } @@ -858,7 +891,7 @@ private String disassembleFunction(String addressStr) { /** * Set a comment using the specified comment type (PRE_COMMENT or EOL_COMMENT) */ - private boolean setCommentAtAddress(String addressStr, String comment, int commentType, String transactionName) { + private boolean setCommentAtAddress(String addressStr, String comment, CommentType commentType, String transactionName) { Program program = getCurrentProgram(); if (program == null) return false; if (addressStr == null || addressStr.isEmpty() || comment == null) return false; @@ -889,14 +922,14 @@ private boolean setCommentAtAddress(String addressStr, String comment, int comme * Set a comment for a given address in the function pseudocode */ private boolean setDecompilerComment(String addressStr, String comment) { - return setCommentAtAddress(addressStr, comment, CodeUnit.PRE_COMMENT, "Set decompiler comment"); + return setCommentAtAddress(addressStr, comment, CommentType.PRE, "Set decompiler comment"); } /** * Set a comment for a given address in the function disassembly */ private boolean setDisassemblyComment(String addressStr, String comment) { - return setCommentAtAddress(addressStr, comment, CodeUnit.EOL_COMMENT, "Set disassembly comment"); + return setCommentAtAddress(addressStr, comment, CommentType.EOL, "Set disassembly comment"); } /** @@ -1035,9 +1068,9 @@ private void addPrototypeComment(Program program, Function func, String prototyp int txComment = program.startTransaction("Add prototype comment"); try { program.getListing().setComment( - func.getEntryPoint(), - CodeUnit.PLATE_COMMENT, - "Setting prototype: " + prototype + func.getEntryPoint(), + CommentType.PLATE, + "Setting prototype: " + prototype ); } finally { program.endTransaction(txComment, true); @@ -1209,7 +1242,7 @@ private DecompileResults decompileFunction(Function func, Program program) { decomp.setSimplificationStyle("decompile"); // Full decompilation // Decompile the function - DecompileResults results = decomp.decompileFunction(func, 60, new ConsoleTaskMonitor()); + DecompileResults results = decomp.decompileFunction(func, this.decompileTimeout, new ConsoleTaskMonitor()); if (!results.decompileCompleted()) { Msg.error(this, "Could not decompile function: " + results.getErrorMessage()); @@ -1513,18 +1546,21 @@ private DataType findDataTypeByNameInAllCategories(DataTypeManager dtm, String t private DataType searchByNameInAllCategories(DataTypeManager dtm, String name) { // Get all data types from the manager Iterator allTypes = dtm.getAllDataTypes(); + DataType fuzzyCandidate = null; while (allTypes.hasNext()) { DataType dt = allTypes.next(); // Check if the name matches exactly (case-sensitive) if (dt.getName().equals(name)) { return dt; - } - // For case-insensitive, we want an exact match except for case - if (dt.getName().equalsIgnoreCase(name)) { - return dt; + } else if (fuzzyCandidate == null && dt.getName().equalsIgnoreCase(name)) { + // For case-insensitive, we want an exact match except for case + // We want to check ALL types for exact matches, not just the first one + // We want to stop on the very first match for fuzzy matching + fuzzyCandidate = dt; } } - return null; + + return fuzzyCandidate; } // ---------------------------------------------------------------------------------- @@ -1606,6 +1642,24 @@ private int parseIntOrDefault(String val, int defaultValue) { } } + /** + * Parse a double from a string, or return defaultValue if null/invalid. + */ + private double parseDoubleOrDefault(String val, String defaultValue) { + if (val == null) val = defaultValue; + try { + return Double.parseDouble(val); + } + catch (NumberFormatException e) { + try { + return Double.parseDouble(defaultValue); + } + catch (NumberFormatException e2) { + return 0.0; + } + } + } + /** * Escape non-ASCII chars to avoid potential decode issues. */ @@ -1624,6 +1678,711 @@ private String escapeNonAscii(String input) { return sb.toString(); } + // ---------------------------------------------------------------------------------- + // BSim functionality + // ---------------------------------------------------------------------------------- + + /** + * Select and connect to a BSim database + */ + private String selectBSimDatabase(String databasePath) { + if (databasePath == null || databasePath.isEmpty()) { + return "Error: Database path is required"; + } + + try { + // Disconnect from any existing database first + if (bsimDatabase != null) { + disconnectBSimDatabase(); + } + + // Create BSimServerInfo from the path/URL + // Use URL constructor for URLs (postgresql://, file://, etc.) + // Use String constructor only for file paths + BSimServerInfo serverInfo; + if (databasePath.contains("://")) { + // It's a URL - use URL constructor + serverInfo = new BSimServerInfo(new java.net.URL(databasePath)); + } else { + // It's a file path - use String constructor + serverInfo = new BSimServerInfo(databasePath); + } + + // Initialize the database connection + bsimDatabase = BSimClientFactory.buildClient(serverInfo, false); + + if (bsimDatabase == null) { + return "Error: Failed to create BSim database client"; + } + + // Try to initialize the connection + if (!bsimDatabase.initialize()) { + bsimDatabase = null; + return "Error: Failed to initialize BSim database connection"; + } + + currentBSimDatabasePath = databasePath; + return "Successfully connected to BSim database: " + databasePath; + + } catch (Exception e) { + bsimDatabase = null; + currentBSimDatabasePath = null; + return "Error connecting to BSim database: " + e.getMessage(); + } + } + + /** + * Disconnect from the current BSim database + */ + private String disconnectBSimDatabase() { + if (bsimDatabase != null) { + try { + bsimDatabase.close(); + bsimDatabase = null; + String path = currentBSimDatabasePath; + currentBSimDatabasePath = null; + return "Disconnected from BSim database: " + path; + } catch (Exception e) { + return "Error disconnecting from BSim database: " + e.getMessage(); + } + } + return "No BSim database connection to disconnect"; + } + + /** + * Get the current BSim database connection status + */ + private String getBSimStatus() { + if (bsimDatabase != null && currentBSimDatabasePath != null) { + try { + StringBuilder status = new StringBuilder(); + status.append("Connected to: ").append(currentBSimDatabasePath).append("\n"); + status.append("Database info:\n"); + + LSHVectorFactory vectorFactory = bsimDatabase.getLSHVectorFactory(); + if (vectorFactory != null) { + status.append(" Vector Factory: ").append(vectorFactory.getClass().getSimpleName()).append("\n"); + } else { + status.append(" Vector Factory: null (ERROR)\n"); + } + + // Try to get database info + QueryInfo infoQuery = new QueryInfo(); + ResponseInfo infoResponse = infoQuery.execute(bsimDatabase); + if (infoResponse != null && infoResponse.info != null) { + status.append(" Database name: ").append(infoResponse.info.databasename).append("\n"); + } + + return status.toString(); + } catch (Exception e) { + return "Connected to: " + currentBSimDatabasePath + " (Error getting details: " + e.getMessage() + ")"; + } + } + return "Not connected to any BSim database"; + } + + /** + * Query a single function against the BSim database + * + * @param functionAddress Address of the function to query + * @param maxMatches Maximum number of matches to return + * @param similarityThreshold Minimum similarity score (inclusive, 0.0-1.0) + * @param confidenceThreshold Minimum confidence score (inclusive, 0.0-1.0) + * @param maxSimilarity Maximum similarity score (exclusive, 0.0-1.0, default: unbounded/infinity) + * @param maxConfidence Maximum confidence score (exclusive, 0.0-1.0, default: unbounded/infinity) + * @param offset Pagination offset + * @param limit Maximum number of results to return + */ + private String queryBSimFunction(String functionAddress, int maxMatches, + double similarityThreshold, double confidenceThreshold, + double maxSimilarity, double maxConfidence, + int offset, int limit) { + if (bsimDatabase == null) { + return "Error: Not connected to a BSim database. Use bsim_select_database first."; + } + + Program program = getCurrentProgram(); + if (program == null) { + return "Error: No program loaded"; + } + + try { + Address addr = program.getAddressFactory().getAddress(functionAddress); + Function func = program.getFunctionManager().getFunctionAt(addr); + if (func == null) { + func = program.getFunctionManager().getFunctionContaining(addr); + } + if (func == null) { + return "Error: No function found at address " + functionAddress; + } + + // Generate signature for this function + GenSignatures gensig = new GenSignatures(false); + gensig.setVectorFactory(bsimDatabase.getLSHVectorFactory()); + + // Set up the executable record for the current program + String exeName = program.getName(); + String exePath = program.getExecutablePath(); + gensig.openProgram(program, exeName, exePath, null, null, null); + + DescriptionManager descManager = gensig.getDescriptionManager(); + gensig.scanFunction(func); + + if (descManager.numFunctions() == 0) { + return "Error: Failed to generate signature for function"; + } + + // Create and execute query + // Note: We don't set query.max here because we need to filter by max similarity/confidence first, + // then limit to maxMatches. Setting query.max too early might exclude valid matches. + QueryNearest query = new QueryNearest(); + query.manage = descManager; + query.max = Integer.MAX_VALUE; // Get all potential matches + query.thresh = similarityThreshold; + query.signifthresh = confidenceThreshold; + + // Execute query + ResponseNearest response = query.execute(bsimDatabase); + + if (response == null) { + return "Error: Query returned no response"; + } + + // Debug info + Msg.info(this, String.format("Query completed for %s: threshold=%.2f, results=%d", + func.getName(), similarityThreshold, + response.result != null ? response.result.size() : 0)); + + // Filter results by max similarity and max confidence, and limit to maxMatches + filterBSimResults(response, maxSimilarity, maxConfidence, maxMatches); + + // Format results with pagination + return formatBSimResults(response, func.getName(), offset, limit); + + } catch (Exception e) { + return "Error querying BSim database: " + e.getMessage(); + } + } + + /** + * Query all functions in the current program against the BSim database + * + * @param maxMatchesPerFunction Maximum number of matches per function + * @param similarityThreshold Minimum similarity score (inclusive, 0.0-1.0) + * @param confidenceThreshold Minimum confidence score (inclusive, 0.0-1.0) + * @param maxSimilarity Maximum similarity score (exclusive, 0.0-1.0, default: unbounded/infinity) + * @param maxConfidence Maximum confidence score (exclusive, 0.0-1.0, default: unbounded/infinity) + * @param offset Pagination offset + * @param limit Maximum number of results to return + */ + private String queryAllBSimFunctions(int maxMatchesPerFunction, + double similarityThreshold, double confidenceThreshold, + double maxSimilarity, double maxConfidence, + int offset, int limit) { + if (bsimDatabase == null) { + return "Error: Not connected to a BSim database. Use bsim_select_database first."; + } + + Program program = getCurrentProgram(); + if (program == null) { + return "Error: No program loaded"; + } + + try { + StringBuilder results = new StringBuilder(); + FunctionManager funcManager = program.getFunctionManager(); + int totalFunctions = funcManager.getFunctionCount(); + int queriedFunctions = 0; + + results.append("Querying ").append(totalFunctions).append(" functions against BSim database...\n\n"); + + // Generate signatures for all functions + GenSignatures gensig = new GenSignatures(false); + gensig.setVectorFactory(bsimDatabase.getLSHVectorFactory()); + + // Set up the executable record for the current program + String exeName = program.getName(); + String exePath = program.getExecutablePath(); + gensig.openProgram(program, exeName, exePath, null, null, null); + + DescriptionManager descManager = gensig.getDescriptionManager(); + + // Use built-in scanFunctions to scan all at once + try { + gensig.scanFunctions(funcManager.getFunctions(true), 30, new ConsoleTaskMonitor()); + queriedFunctions = descManager.numFunctions(); + } catch (Exception e) { + return "Error: Failed to generate signatures: " + e.getMessage(); + } + + if (queriedFunctions == 0) { + return "Error: No function signatures were generated"; + } + + // Create query + // Note: We don't set query.max here because we need to filter by max similarity/confidence first, + // then limit to maxMatchesPerFunction. Setting query.max too early might exclude valid matches. + QueryNearest query = new QueryNearest(); + query.manage = descManager; + query.max = Integer.MAX_VALUE; // Get all potential matches + query.thresh = similarityThreshold; + query.signifthresh = confidenceThreshold; + + // Execute query + ResponseNearest response = query.execute(bsimDatabase); + + if (response == null) { + return "Error: Query returned no response"; + } + + // Filter results by max similarity and max confidence, and limit to maxMatchesPerFunction + filterBSimResults(response, maxSimilarity, maxConfidence, maxMatchesPerFunction); + + results.append("Successfully queried ").append(queriedFunctions).append(" functions\n"); + + // Format detailed results with pagination + results.append(formatBSimResults(response, null, offset, limit)); + + return results.toString(); + + } catch (Exception e) { + return "Error querying all functions: " + e.getMessage(); + } + } + + /** + * Get detailed information about a BSim match from a program in the Ghidra project. + * Falls back gracefully if the program is not found in the project. + */ + /** + * Get the disassembly of a BSim match from a program in the Ghidra project. + */ + private String getBSimMatchDisassembly(String executablePath, String functionName, String functionAddress) { + return getBSimMatchFunction(executablePath, functionName, functionAddress, true, false); + } + + /** + * Get the decompilation of a BSim match from a program in the Ghidra project. + */ + private String getBSimMatchDecompile(String executablePath, String functionName, String functionAddress) { + return getBSimMatchFunction(executablePath, functionName, functionAddress, false, true); + } + + /** + * Get function details for a BSim match from a program in the Ghidra project. + * Falls back gracefully if the program is not found in the project. + */ + private String getBSimMatchFunction(String executablePath, String functionName, String functionAddress, + boolean includeDisassembly, boolean includeDecompile) { + // Input validation + if (executablePath == null || executablePath.isEmpty()) { + return "Error: Executable path is required"; + } + if (functionName == null || functionName.isEmpty()) { + return "Error: Function name is required"; + } + if (functionAddress == null || functionAddress.isEmpty()) { + return "Error: Function address is required"; + } + + StringBuilder result = new StringBuilder(); + result.append("Match Details\n"); + result.append("=============\n"); + result.append(String.format("Executable: %s\n", executablePath)); + result.append(String.format("Function: %s\n", functionName)); + result.append(String.format("Address: %s\n\n", functionAddress)); + + ProgramManager pm = tool.getService(ProgramManager.class); + if (pm == null) { + result.append("ERROR: ProgramManager service not available\n"); + return result.toString(); + } + + String fileName = new java.io.File(executablePath).getName(); + Program matchedProgram = null; + boolean needsRelease = false; + + // Strategy 1: Check all open programs + Program[] openPrograms = pm.getAllOpenPrograms(); + for (Program program : openPrograms) { + if (executablePath.equals(program.getExecutablePath()) || fileName.equals(program.getName())) { + matchedProgram = program; + needsRelease = false; + break; + } + } + + // Strategy 2: Try to find in project but not currently open + if (matchedProgram == null) { + ghidra.framework.model.Project project = tool.getProject(); + if (project != null) { + ghidra.framework.model.DomainFile domainFile = findDomainFileRecursive( + project.getProjectData().getRootFolder(), fileName); + + if (domainFile != null) { + try { + ghidra.framework.model.DomainObject domainObject = + domainFile.getDomainObject(this, false, false, new ConsoleTaskMonitor()); + if (domainObject instanceof Program) { + matchedProgram = (Program) domainObject; + needsRelease = true; + } + } catch (Exception e) { + Msg.error(this, "Failed to open program from project: " + fileName, e); + } + } + } + } + + if (matchedProgram == null) { + result.append("ERROR: Program not found in Ghidra project\n"); + result.append("The matched executable is not in the current project.\n"); + result.append("\nTo view match details, please import the program into Ghidra:\n"); + result.append(" ").append(executablePath).append("\n"); + return result.toString(); + } + + try { + // Find the function + Address addr = matchedProgram.getAddressFactory().getAddress(functionAddress); + Function func = matchedProgram.getFunctionManager().getFunctionAt(addr); + + if (func == null) { + func = matchedProgram.getFunctionManager().getFunctionContaining(addr); + } + + if (func == null) { + result.append("ERROR: Function not found at address ").append(functionAddress).append("\n"); + return result.toString(); + } + + // Get function prototype + result.append("Function Prototype:\n"); + result.append("-------------------\n"); + result.append(func.getSignature()).append("\n\n"); + + // Get decompilation if requested + if (includeDecompile) { + result.append("Decompilation:\n"); + result.append("--------------\n"); + String decompCode = decompileFunctionInProgram(func, matchedProgram); + if (decompCode != null && !decompCode.isEmpty()) { + result.append(decompCode).append("\n"); + } else { + result.append("(Decompilation not available)\n"); + } + } + + // Get assembly if requested + if (includeDisassembly) { + if (includeDecompile) { + result.append("\n"); + } + result.append("Assembly:\n"); + result.append("---------\n"); + String asmCode = disassembleFunctionInProgram(func, matchedProgram); + if (asmCode != null && !asmCode.isEmpty()) { + result.append(asmCode); + } else { + result.append("(Assembly not available)\n"); + } + } + + return result.toString(); + + } catch (Exception e) { + result.append("ERROR: Exception while processing program: ").append(e.getMessage()).append("\n"); + Msg.error(this, "Error getting BSim match function", e); + return result.toString(); + } finally { + // Release the program if we opened it from the project + if (needsRelease && matchedProgram != null) { + matchedProgram.release(this); + } + } + } + + /** + * Recursively search for a domain file by name in a folder and its subfolders + */ + private ghidra.framework.model.DomainFile findDomainFileRecursive( + ghidra.framework.model.DomainFolder folder, String fileName) { + + // Check files in current folder + for (ghidra.framework.model.DomainFile file : folder.getFiles()) { + if (fileName.equals(file.getName())) { + return file; + } + } + + // Recursively check subfolders + for (ghidra.framework.model.DomainFolder subfolder : folder.getFolders()) { + ghidra.framework.model.DomainFile result = findDomainFileRecursive(subfolder, fileName); + if (result != null) { + return result; + } + } + + return null; + } + + /** + * Decompile a function within a specific program + */ + private String decompileFunctionInProgram(Function func, Program program) { + try { + DecompInterface decomp = new DecompInterface(); + decomp.openProgram(program); + DecompileResults result = decomp.decompileFunction(func, this.decompileTimeout, new ConsoleTaskMonitor()); + + if (result != null && result.decompileCompleted()) { + return result.getDecompiledFunction().getC(); + } + } catch (Exception e) { + Msg.error(this, "Error decompiling function in external program", e); + } + return null; + } + + /** + * Disassemble a function within a specific program + */ + private String disassembleFunctionInProgram(Function func, Program program) { + try { + StringBuilder result = new StringBuilder(); + Listing listing = program.getListing(); + Address start = func.getEntryPoint(); + Address end = func.getBody().getMaxAddress(); + + InstructionIterator instructions = listing.getInstructions(start, true); + while (instructions.hasNext()) { + Instruction instr = instructions.next(); + if (instr.getAddress().compareTo(end) > 0) { + break; + } + String comment = listing.getComment(CommentType.EOL, instr.getAddress()); + comment = (comment != null) ? "; " + comment : ""; + + result.append(String.format("%s: %s %s\n", + instr.getAddress(), + instr.toString(), + comment)); + } + return result.toString(); + } catch (Exception e) { + Msg.error(this, "Error disassembling function in external program", e); + } + return null; + } + + /** + * Filter BSim results by maximum similarity and confidence thresholds, and limit matches. + * Removes matches that exceed the specified maximum values, and limits the number of + * matches per function. Implements early stopping for efficiency. + * + * Note: Maximum thresholds are exclusive (values >= max are filtered out), + * while minimum thresholds (applied in the query) are inclusive. + * + * @param response The BSim query response to filter + * @param maxSimilarity Maximum similarity score (exclusive) - matches >= this value are removed + * @param maxConfidence Maximum confidence score (exclusive) - matches >= this value are removed + * @param maxMatches Maximum number of matches to keep per function (for early stopping) + */ + private void filterBSimResults(ResponseNearest response, double maxSimilarity, double maxConfidence, int maxMatches) { + if (response == null || response.result == null) { + return; + } + + Iterator iter = response.result.iterator(); + while (iter.hasNext()) { + SimilarityResult simResult = iter.next(); + Iterator noteIter = simResult.iterator(); + + int validMatchCount = 0; + + while (noteIter.hasNext()) { + SimilarityNote note = noteIter.next(); + + // Remove matches that meet or exceed max similarity or max confidence (exclusive) + if (note.getSimilarity() >= maxSimilarity || note.getSignificance() >= maxConfidence) { + noteIter.remove(); + } else { + // This is a valid match + validMatchCount++; + + // Early stopping: if we've reached maxMatches valid matches, remove all remaining + if (validMatchCount > maxMatches) { + noteIter.remove(); + } + } + } + } + } + + private Address getAddressFromLong(long address) { + Program program = getCurrentProgram(); + String hexAddr = Long.toHexString(address); + return program.getAddressFactory().getAddress(hexAddr); + } + + /** + * Format BSim query results into a readable string with pagination + */ + private String formatBSimResults(ResponseNearest response, String queryFunctionName, int offset, int limit) { + StringBuilder result = new StringBuilder(); + + if (queryFunctionName != null) { + result.append("Matches for function: ").append(queryFunctionName).append("\n\n"); + } + + Iterator iter = response.result.iterator(); + int totalMatchCount = 0; + int displayedMatchCount = 0; + + while (iter.hasNext()) { + SimilarityResult simResult = iter.next(); + FunctionDescription base = simResult.getBase(); + + if (simResult.size() == 0) { + if (queryFunctionName == null) { + continue; // Skip functions with no matches when querying all + } + result.append("No matches found (all matches filtered out or none available)\n"); + return result.toString(); // Early return for single function with no matches + } + + // For single function query, paginate the matches + // For all functions query, paginate the functions + if (queryFunctionName != null) { + // Single function: paginate through similarity matches + int totalMatches = simResult.size(); + + Iterator noteIter = simResult.iterator(); + int matchIndex = 0; + + while (noteIter.hasNext()) { + SimilarityNote note = noteIter.next(); + + // Skip matches before offset + if (matchIndex < offset) { + matchIndex++; + continue; + } + + // Stop if we've reached the limit + if (displayedMatchCount >= limit) { + break; + } + + FunctionDescription match = note.getFunctionDescription(); + ExecutableRecord exe = match.getExecutableRecord(); + + result.append(String.format("Match %d:\n", matchIndex + 1)); + + if (exe != null) { + result.append(String.format(" Executable: %s\n", exe.getNameExec())); + } + + result.append(String.format(" Function: %s\n", match.getFunctionName())); + result.append(String.format(" Address: %s\n", getAddressFromLong(match.getAddress()))); + result.append(String.format(" Similarity: %.4f\n", note.getSimilarity())); + result.append(String.format(" Confidence: %.4f\n", note.getSignificance())); + + if (exe != null) { + String arch = exe.getArchitecture(); + String compiler = exe.getNameCompiler(); + if (arch != null && !arch.isEmpty()) { + result.append(String.format(" Architecture: %s\n", arch)); + } + if (compiler != null && !compiler.isEmpty()) { + result.append(String.format(" Compiler: %s\n", compiler)); + } + } + + result.append("\n"); + matchIndex++; + displayedMatchCount++; + } + + // Add pagination info for single function + int remaining = totalMatches - offset - displayedMatchCount; + if (remaining < 0) remaining = 0; + + result.append(String.format("Showing matches %d-%d of %d", + offset + 1, offset + displayedMatchCount, totalMatches)); + if (remaining > 0) { + result.append(String.format(" (%d more available)\n", remaining)); + } else { + result.append("\n"); + } + } else { + // All functions query: paginate by function (skip if before offset) + if (totalMatchCount < offset) { + totalMatchCount++; + continue; + } + + result.append("Function: ").append(base.getFunctionName()) + .append(" at ").append(getAddressFromLong(base.getAddress())).append("\n"); + + // Stop if we've reached the limit + if (displayedMatchCount >= limit) { + break; + } + + result.append("Found ").append(simResult.size()).append(" match(es):\n"); + + Iterator noteIter = simResult.iterator(); + int matchNum = 1; + while (noteIter.hasNext()) { + SimilarityNote note = noteIter.next(); + FunctionDescription match = note.getFunctionDescription(); + ExecutableRecord exe = match.getExecutableRecord(); + + result.append(String.format("Match %d:\n", matchNum)); + + result.append(String.format(" Executable: %s\n", exe.getNameExec())); + result.append(String.format(" Function: %s\n", match.getFunctionName())); + result.append(String.format(" Address: %s\n", getAddressFromLong(match.getAddress()))); + result.append(String.format(" Similarity: %.4f\n", note.getSimilarity())); + result.append(String.format(" Confidence: %.4f\n", note.getSignificance())); + + result.append("\n"); + matchNum++; + } + result.append("\n"); + totalMatchCount++; + displayedMatchCount++; + } + } + + // Add pagination info for all functions query + if (queryFunctionName == null) { + // Count remaining functions + int remaining = 0; + while (iter.hasNext()) { + SimilarityResult simResult = iter.next(); + if (simResult.size() > 0) { + remaining++; + } + } + + if (displayedMatchCount == 0) { + result.append("No matches found for any functions\n"); + } else { + result.append(String.format("Showing functions %d-%d of %d+ results", + offset + 1, offset + displayedMatchCount, offset + displayedMatchCount + remaining)); + if (remaining > 0) { + result.append(String.format(" (%d more available)\n", remaining)); + } else { + result.append("\n"); + } + } + } + + return result.toString(); + } + public Program getCurrentProgram() { ProgramManager pm = tool.getService(ProgramManager.class); return pm != null ? pm.getCurrentProgram() : null;