Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ MCP Server + Ghidra Plugin
- Decompile and analyze binaries in Ghidra
- Automatically rename methods and data
- List methods, classes, imports, and exports
- BSim integration for function similarity matching
- Connect to BSim databases (H2, PostgreSQL)
- Query individual functions for similar matches
- Batch query all functions in a program
- View similarity scores, confidence levels, and executable metadata

# Installation

Expand Down Expand Up @@ -108,6 +113,13 @@ Another MCP client that supports multiple models on the backend is [5ire](https:
- `Ghidra/Framework/SoftwareModeling/lib/SoftwareModeling.jar`
- `Ghidra/Framework/Utility/lib/Utility.jar`
- `Ghidra/Framework/Gui/lib/Gui.jar`
- `Ghidra/Features/BSim/lib/BSim.jar lib/BSim.jar`
- `Ghidra/Features/BSim/lib/commons-dbcp2-2.9.0.jar lib/commons-dbcp2-2.9.0.jar`
- `Ghidra/Features/BSim/lib/commons-logging-1.2.jar lib/commons-logging-1.2.jar`
- `Ghidra/Features/BSim/lib/commons-pool2-2.11.1.jar lib/commons-pool2-2.11.1.jar`
- `Ghidra/Features/BSim/lib/h2-2.2.220.jar lib/h2-2.2.220.jar`
- `Ghidra/Features/BSim/lib/postgresql-42.7.6.jar lib/postgresql-42.7.6.jar`

2. Build with Maven by running:

`mvn clean package assembly:single`
Expand Down
173 changes: 171 additions & 2 deletions bridge_mcp_ghidra.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ def safe_post(endpoint: str, data: dict | str) -> str:
try:
url = urljoin(ghidra_server_url, endpoint)
if isinstance(data, dict):
response = requests.post(url, data=data, timeout=5)
# BSim queries might be a bit slower, hence the long-ish timeout
response = requests.post(url, data=data, timeout=20)
else:
response = requests.post(url, data=data.encode("utf-8"), timeout=5)
response = requests.post(url, data=data.encode("utf-8"), timeout=20)
response.encoding = 'utf-8'
if response.ok:
return response.text.strip()
Expand Down Expand Up @@ -287,6 +288,174 @@ def list_strings(offset: int = 0, limit: int = 2000, filter: str = None) -> list
params["filter"] = filter
return safe_get("strings", params)

@mcp.tool()
def bsim_select_database(database_path: str) -> str:
"""
Select and connect to a BSim database for function similarity matching.

Args:
database_path: Path to BSim database file (e.g., "/path/to/database.bsim")
or URL (e.g., "postgresql://host:port/dbname")

Returns:
Connection status and database information
"""
return safe_post("bsim/select_database", {"database_path": database_path})

@mcp.tool()
def bsim_query_function(
function_address: str,
max_matches: int = 10,
similarity_threshold: float = 0.7,
confidence_threshold: float = 0.0,
max_similarity: float | None = None,
max_confidence: float | None = None,
offset: int = 0,
limit: int = 100,
) -> str:
"""
Query a single function against the BSim database to find similar functions.

Args:
function_address: Address of the function to query (e.g., "0x401000")
max_matches: Maximum number of matches to return (default: 10)
similarity_threshold: Minimum similarity score (inclusive, 0.0-1.0, default: 0.7)
confidence_threshold: Minimum confidence score (inclusive, 0.0-1.0, default: 0.0)
max_similarity: Maximum similarity score (exclusive, 0.0-1.0, default: unbounded)
max_confidence: Maximum confidence score (exclusive, 0.0-1.0, default: unbounded)
offset: Pagination offset (default: 0)
limit: Maximum number of results to return (default: 100)

Returns:
List of matching functions with similarity scores and metadata
"""
data = {
"function_address": function_address,
"max_matches": str(max_matches),
"similarity_threshold": str(similarity_threshold),
"confidence_threshold": str(confidence_threshold),
"offset": str(offset),
"limit": str(limit),
}

if max_similarity is not None:
data["max_similarity"] = str(max_similarity)
if max_confidence is not None:
data["max_confidence"] = str(max_confidence)

return safe_post("bsim/query_function", data)

@mcp.tool()
def bsim_query_all_functions(
max_matches_per_function: int = 5,
similarity_threshold: float = 0.7,
confidence_threshold: float = 0.0,
max_similarity: float | None = None,
max_confidence: float | None = None,
offset: int = 0,
limit: int = 100,
) -> str:
"""
Query all functions in the current program against the BSim database.
Returns an overview of matches for all functions.

Args:
max_matches_per_function: Max matches per function (default: 5)
similarity_threshold: Minimum similarity score (inclusive, 0.0-1.0, default: 0.7)
confidence_threshold: Minimum confidence score (inclusive, 0.0-1.0, default: 0.0)
max_similarity: Maximum similarity score (exclusive, 0.0-1.0, default: unbounded)
max_confidence: Maximum confidence score (exclusive, 0.0-1.0, default: unbounded)
offset: Pagination offset (default: 0)
limit: Maximum number of results to return (default: 100)

Returns:
Summary and detailed results for all matching functions
"""
data = {
"max_matches_per_function": str(max_matches_per_function),
"similarity_threshold": str(similarity_threshold),
"confidence_threshold": str(confidence_threshold),
"offset": str(offset),
"limit": str(limit),
}

if max_similarity is not None:
data["max_similarity"] = str(max_similarity)
if max_confidence is not None:
data["max_confidence"] = str(max_confidence)

return safe_post("bsim/query_all_functions", data)

@mcp.tool()
def bsim_disconnect() -> str:
"""
Disconnect from the current BSim database.

Returns:
Disconnection status message
"""
return safe_post("bsim/disconnect", {})

@mcp.tool()
def bsim_status() -> str:
"""
Get the current BSim database connection status.

Returns:
Current connection status and database path if connected
"""
return "\n".join(safe_get("bsim/status"))

@mcp.tool()
def bsim_get_match_disassembly(
executable_path: str,
function_name: str,
function_address: str,
) -> str:
"""
Get the disassembly of a specific BSim match. This requires the matched
executable to be available in the Ghidra project.

Args:
executable_path: Path to the matched executable (from BSim match result)
function_name: Name of the matched function
function_address: Address of the matched function (e.g., "0x401000")

Returns:
Function prototype and assembly code for the matched function.
Returns an error message if the program is not found in the project.
"""
return safe_post("bsim/get_match_disassembly", {
"executable_path": executable_path,
"function_name": function_name,
"function_address": function_address,
})

@mcp.tool()
def bsim_get_match_decompile(
executable_path: str,
function_name: str,
function_address: str,
) -> str:
"""
Get the decompilation of a specific BSim match. This requires the matched
executable to be available in the Ghidra project.

Args:
executable_path: Path to the matched executable (from BSim match result)
function_name: Name of the matched function
function_address: Address of the matched function (e.g., "0x401000")

Returns:
Function prototype and decompiled C code for the matched function.
Returns an error message if the program is not found in the project.
"""
return safe_post("bsim/get_match_decompile", {
"executable_path": executable_path,
"function_name": function_name,
"function_address": function_address,
})

def main():
parser = argparse.ArgumentParser(description="MCP server for Ghidra")
parser.add_argument("--ghidra-server", type=str, default=DEFAULT_GHIDRA_SERVER,
Expand Down
44 changes: 44 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,50 @@
<systemPath>${project.basedir}/lib/Gui.jar</systemPath>
</dependency>

<!-- BSim dependencies -->
<dependency>
<groupId>ghidra</groupId>
<artifactId>BSim</artifactId>
<version>11.3.2</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/BSim.jar</systemPath>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
<version>2.9.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/commons-dbcp2-2.9.0.jar</systemPath>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/commons-logging-1.2.jar</systemPath>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-pool2</artifactId>
<version>2.11.1</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/commons-pool2-2.11.1.jar</systemPath>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>2.2.220</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/h2-2.2.220.jar</systemPath>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.7.6</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/postgresql-42.7.6.jar</systemPath>
</dependency>

<!-- JUnit (test only) -->
<dependency>
<groupId>junit</groupId>
Expand Down
Loading