Skip to content

Commit

Permalink
Merge pull request #16 from br0kej/dev
Browse files Browse the repository at this point in the history
Enhanced Tests + PCode CFG's
  • Loading branch information
br0kej authored Jul 29, 2024
2 parents cc3a94c + 26fc8b1 commit bc146e0
Show file tree
Hide file tree
Showing 14 changed files with 5,504 additions and 12 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Cargo Build & Test

on:
push:
pull_request:

env:
CARGO_TERM_COLOR: always

jobs:
build_and_test:
name: Rust project - latest
runs-on: ubuntu-latest
strategy:
matrix:
toolchain:
- stable
- beta
- nightly
steps:
- uses: actions/checkout@v4
- run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }}
- run: cargo test --verbose
5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "bin2ml"
version = "0.3.0"
version = "0.3.1"
edition = "2021"

[dependencies]
Expand Down Expand Up @@ -36,6 +36,9 @@ features = ["serde-1"]
inference = ["dep:tch"]
goblin = ["dep:goblin"]

[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(inference)'] }

#https://github.com/johnthagen/min-sized-rust
[profile.release]
lto = true
Expand Down
2 changes: 2 additions & 0 deletions src/agfj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ impl AGFJFunc {
info!("Invalid Feature Type. Skipping..");
return;
}
FeatureType::Pcode => StringOrF64::String(Vec::new()),
};

let min_offset: u64 = self.offset;
Expand Down Expand Up @@ -412,6 +413,7 @@ impl AGFJFunc {
info!("Invalid Feature Type. Skipping..");
return;
}
_ => {}
};

if !edge_list.is_empty() {
Expand Down
1 change: 1 addition & 0 deletions src/bb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub enum FeatureType {
ModelEmbedded,
Encoded,
Invalid,
Pcode,
}

#[derive(Serialize, Deserialize, Debug, PartialEq, Clone, Copy)]
Expand Down
5 changes: 0 additions & 5 deletions src/combos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ pub enum ComboFileTypes {
#[derive(Debug)]
pub struct ComboJob {
pub combo_type: ComboTypes,
pub file_type_one: ComboFileTypes,
pub file_type_two: ComboFileTypes,
pub input_path: PathBuf,
pub output_path: PathBuf,
}
Expand All @@ -53,11 +51,8 @@ impl ComboJob {
let combo_file_types = combo_type.to_combo_file_types();

if combo_file_types.is_ok() {
let combo_file_types = combo_file_types.unwrap();
Ok(ComboJob {
combo_type,
file_type_one: combo_file_types.0,
file_type_two: combo_file_types.1,
input_path: input_path.to_path_buf(),
output_path: output_path.to_path_buf(),
})
Expand Down
2 changes: 1 addition & 1 deletion src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub const GENERAL_PURPOSE_32_BIT_REGS: [&str; 165] = [
"f21", "f22", "f23", "f24", "f25", "f26", "f27", "f29", "f30", "f31", "v0", "v1", "a0", "a1",
"a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0",
];
// MIPS registers are normalised to 32 bit always - This is becuase the name of the registers
// MIPS registers are normalised to 32 bit always - This is because the name of the registers
// is always the same regardless of architecture - Different to x86/x64.
pub const GENERAL_PURPOSE_64_BIT_REGS: [&str; 75] = [
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
Expand Down
2 changes: 2 additions & 0 deletions src/dedup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,8 @@ impl CGCorpus {
}
}

filepaths.sort_unstable();

info!("Returning One Hop CG Corpus Struct");
let output_path = output_path.to_owned();

Expand Down
2 changes: 1 addition & 1 deletion src/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ pub struct BasicBlockMetadataEntry {
pub size: u64,
pub jump: Option<u64>,
pub fail: Option<u64>,
pub opaddr: f64,
pub opaddr: u64,
pub inputs: u64,
pub outputs: u64,
pub ninstr: u64,
Expand Down
59 changes: 57 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ enum GenerateSubCommands {
output_path: PathBuf,

/// The type of features to generate per basic block (node)
#[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib", "disasm", "esil"])
#[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib", "disasm", "esil", "pcode"])
.map(|s| s.parse::<String>().unwrap()),)]
feature_type: Option<String>,

Expand Down Expand Up @@ -460,6 +460,7 @@ fn main() {
"esil" => FeatureType::Esil,
#[cfg(feature = "inference")]
"embed" => FeatureType::ModelEmbedded,
"pcode" => FeatureType::Pcode,
_ => FeatureType::Invalid,
};

Expand Down Expand Up @@ -493,7 +494,7 @@ fn main() {
WalkDir::new(path).into_iter().filter_map(|file| file.ok())
{
if file.path().to_string_lossy().ends_with(".json") {
validate_input(path, "cfg");
validate_input(file.path(), "cfg");
agfj_graph_statistical_features(
file.path(),
&min_blocks.unwrap(),
Expand Down Expand Up @@ -524,6 +525,60 @@ fn main() {
);
}
}
} else if feature_vec_type == FeatureType::Pcode {
if Path::new(path).is_file() {
validate_input(path, "cfg");
info!("Single file found");
let mut file = PCodeFile {
filename: path.to_owned(),
pcode_obj: None,
output_path: output_path.to_owned(),
min_blocks: *min_blocks,
instruction_pairs: false,
format_type: FormatMode::SingleInstruction,
pcode_file_type: PCodeFileTypes::PCodeJsonFile,
};
let file_ret = file.load_and_deserialize().is_ok();
if file_ret {
let cfg_gen_ret =
file.pcode_json_with_bb_info_generate_cfg().is_ok();
if cfg_gen_ret {
info!("Successfully generated CFG's with PCode features")
} else {
error!("Failed to generate CFG's with PCode features")
}
}
} else {
info!("[L551] Multiple files found. Will parallel process.");
for file in
WalkDir::new(path).into_iter().filter_map(|file| file.ok())
{
if file.path().to_string_lossy().ends_with(".json") {
validate_input(file.path(), "cfg");
let mut file = PCodeFile {
filename: file.path().to_owned(),
pcode_obj: None,
output_path: output_path.to_owned(),
min_blocks: *min_blocks,
instruction_pairs: false,
format_type: FormatMode::SingleInstruction,
pcode_file_type: PCodeFileTypes::PCodeJsonFile,
};
let file_ret = file.load_and_deserialize().is_ok();
if file_ret {
let cfg_gen_ret =
file.pcode_json_with_bb_info_generate_cfg().is_ok();
if cfg_gen_ret {
info!("Successfully generated CFG's with PCode features")
} else {
error!(
"Failed to generate CFG's with PCode features"
)
}
}
}
}
}
}
} else {
error!("--feature-type/-f is required for creating CFG's")
Expand Down
90 changes: 90 additions & 0 deletions src/networkx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@ use crate::afij::AFIJFeatureSubset;
use crate::agfj::TikNibFunc;
use crate::bb::{FeatureType, TikNibFeaturesBB};
use crate::combos::FinfoTiknib;
use crate::extract::PCodeJsonWithBBAndFuncName;
use enum_as_inner::EnumAsInner;
use petgraph::prelude::Graph;
use petgraph::visit::EdgeRef;
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::Write;
use std::path::Path;

#[derive(Default, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
Expand All @@ -17,6 +21,19 @@ pub struct NetworkxDiGraph<N> {
pub nodes: Vec<N>,
}

impl<N: Serialize> NetworkxDiGraph<N> {
pub fn save_to_json<P: AsRef<Path>>(&self, path: P) -> std::io::Result<()> {
// Serialize the struct to a JSON string
let json = serde_json::to_string(self)?;

// Open the file and write the JSON string
let mut file = File::create(path)?;
file.write_all(json.as_bytes())?;

Ok(())
}
}

#[derive(Default, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Adjacency {
Expand All @@ -32,6 +49,7 @@ pub enum NodeType {
Tiknib(TiknibNode),
Disasm(DisasmNode),
Esil(EsilNode),
PCode(PCodeNode),
}

#[derive(Debug, Clone, PartialEq, Hash, Serialize, Deserialize, EnumAsInner)]
Expand Down Expand Up @@ -622,3 +640,75 @@ impl From<NetworkxDiGraph<NodeType>> for NetworkxDiGraph<EsilNode> {
}
}
}

#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PCodeNode {
pub id: u64,
pub start_addr: u64,
pub features: Vec<String>,
}

impl From<(u64, u64, &Vec<String>)> for PCodeNode {
fn from(src: (u64, u64, &Vec<String>)) -> PCodeNode {
PCodeNode {
id: src.0,
start_addr: src.1,
features: src.2.to_owned(),
}
}
}

impl From<(&Graph<String, u32>, &PCodeJsonWithBBAndFuncName, &Vec<u32>)>
for NetworkxDiGraph<PCodeNode>
{
fn from(
input: (&Graph<String, u32>, &PCodeJsonWithBBAndFuncName, &Vec<u32>),
) -> NetworkxDiGraph<PCodeNode> {
let mut nodes: Vec<NodeType> = vec![];

for (idx, address) in input.2.iter().enumerate() {
let pcode_node = input
.1
.pcode_blocks
.iter()
.find(|ele| ele.block_start_adr as u32 == *address);
if let Some(pcode_node) = pcode_node {
nodes.push(NodeType::PCode(PCodeNode::from((
idx as u64,
pcode_node.block_start_adr,
&pcode_node.pcode,
))))
}
}

// Sort edges out
let mut adjacency: Vec<Vec<Adjacency>> = vec![];
let node_indices = input.0.node_indices();

for node in node_indices {
let mut node_adjacency_vec = vec![];
let node_edges = input.0.edges(node);
for edge in node_edges {
let edge_entry = Adjacency {
id: edge.target().index(),
weight: edge.weight().to_owned(),
};
node_adjacency_vec.push(edge_entry)
}
adjacency.push(node_adjacency_vec)
}

let inner_nodes_types: Vec<PCodeNode> = nodes
.into_iter()
.map(|el| el.as_p_code().unwrap().clone())
.collect();

NetworkxDiGraph {
adjacency,
directed: "True".to_string(),
graph: vec![],
multigraph: false,
nodes: inner_nodes_types,
}
}
}
Loading

0 comments on commit bc146e0

Please sign in to comment.