diff --git a/benches/compound_processing_benches.rs b/benches/compound_processing_benches.rs new file mode 100644 index 0000000..28eb23e --- /dev/null +++ b/benches/compound_processing_benches.rs @@ -0,0 +1,49 @@ +#![feature(test)] + +use cheminee::search::compound_processing::*; + +extern crate test; +use rdkit::ROMol; +use test::Bencher; + +#[bench] +fn bench_process_cpd(b: &mut Bencher) { + let smiles1 = + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O"; + + b.iter(|| { + let _ = process_cpd(smiles1, false); + }); +} + +// running 1 test +// test bench_process_cpd ... bench: 5,527,616.65 ns/iter (+/- 337,547.33) + +#[bench] +fn bench_standardize_mol(b: &mut Bencher) { + let mol = ROMol::from_smiles( + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O", + ) + .unwrap(); + + b.iter(|| { + let _ = standardize_mol(&mol); + }); +} + +// running 1 test +// test bench_standardize_mol ... bench: 3,906,768.75 ns/iter (+/- 230,949.77) + +#[bench] +fn bench_get_cpd_properties(b: &mut Bencher) { + let smiles1 = + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O"; + let canon_taut = standardize_smiles(smiles1, false).unwrap(); + + b.iter(|| { + let _ = get_cpd_properties(&canon_taut); + }); +} + +// running 1 test +// test bench_get_cpd_properties ... bench: 1,329,194.77 ns/iter (+/- 183,436.78) diff --git a/benches/fingerprint_benches.rs b/benches/fingerprint_benches.rs index 1160dd6..c2e2934 100644 --- a/benches/fingerprint_benches.rs +++ b/benches/fingerprint_benches.rs @@ -18,16 +18,31 @@ fn bench_tanimoto_distance(b: &mut Bencher) { } // running 1 test -// Before: test bench_tanimoto_similarity ... bench: 132,690 ns/iter (+/- 2,681) -// Now: test bench_tanimoto_distance ... bench: 2,653 ns/iter (+/- 790) +// test bench_tanimoto_distance ... bench: 2,520.20 ns/iter (+/- 151.54) #[bench] fn bench_fingerprint_generation(b: &mut Bencher) { - let smiles = "[N]Cc1cncc2c(=O)c3cccc(CCC(=O)O)c3[nH]c12"; + let smiles = + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O"; let romol = ROMol::from_smiles(smiles).unwrap(); b.iter(|| romol.fingerprint()); } // running 1 test -// test bench_fingerprint_generation ... bench: 1,161,160 ns/iter (+/- 274,184) +// test bench_fingerprint_generation ... bench: 784,123.44 ns/iter (+/- 137,752.50) + +#[bench] +fn bench_fingerprint_exact_match(b: &mut Bencher) { + let mol1 = ROMol::from_smiles( + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O", + ) + .unwrap(); + let fp1 = mol1.fingerprint(); + let fp2 = fp1.clone(); + + b.iter(|| fp1.0 == fp2.0); +} + +// running 1 test +// test bench_fingerprint_exact_match ... bench: 372.45 ns/iter (+/- 30.19) diff --git a/benches/indexing_benches.rs b/benches/indexing_benches.rs new file mode 100644 index 0000000..9faf089 --- /dev/null +++ b/benches/indexing_benches.rs @@ -0,0 +1,43 @@ +#![feature(test)] + +extern crate test; + +use cheminee::command_line::indexing::bulk_index::create_tantivy_doc; +use cheminee::indexing::KNOWN_DESCRIPTORS; +use cheminee::schema::LIBRARY; +use serde_json::Value; +use std::collections::HashMap; +use tantivy::schema::Field; +use test::Bencher; + +#[bench] +fn bench_create_tantivy_doc(b: &mut Bencher) { + let schema = LIBRARY.get("descriptor_v1").unwrap().clone(); + let smiles_field = schema.get_field("smiles").unwrap(); + let fingerprint_field = schema.get_field("fingerprint").unwrap(); + let extra_data_field = schema.get_field("extra_data").unwrap(); + let descriptor_fields = KNOWN_DESCRIPTORS + .iter() + .map(|kd| (*kd, schema.get_field(kd).unwrap())) + .collect::>(); + + let smi = + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O"; + let mut map = serde_json::Map::new(); + map.insert("smiles".to_string(), Value::String(smi.to_string())); + + let record: Value = Value::Object(map); + + b.iter(|| { + let _ = create_tantivy_doc( + record.clone(), + smiles_field, + fingerprint_field, + &descriptor_fields, + extra_data_field, + ); + }); +} + +// running 1 test +// test bench_create_tantivy_doc ... bench: 6,527,306.20 ns/iter (+/- 562,411.21) diff --git a/benches/scaffold_benches.rs b/benches/scaffold_benches.rs index 38b3634..36e8905 100644 --- a/benches/scaffold_benches.rs +++ b/benches/scaffold_benches.rs @@ -13,3 +13,6 @@ fn bench_scaffold_search(b: &mut Bencher) { b.iter(|| scaffold_search(&fp.0, &mol, &PARSED_SCAFFOLDS)); } + +// running 1 test +// test bench_scaffold_search ... bench: 756,170.85 ns/iter (+/- 27,301.91) diff --git a/benches/structure_matching_benches.rs b/benches/structure_matching_benches.rs new file mode 100644 index 0000000..efb18eb --- /dev/null +++ b/benches/structure_matching_benches.rs @@ -0,0 +1,80 @@ +#![feature(test)] + +extern crate test; +use cheminee::search::structure_matching::exact_match; +use rdkit::{substruct_match, ROMol, SubstructMatchParameters}; +use test::Bencher; + +#[bench] +fn bench_exact_match_no_chirality(b: &mut Bencher) { + let romol1 = ROMol::from_smiles( + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O", + ) + .unwrap(); + + let romol2 = romol1.clone(); + + b.iter(|| { + let _ = exact_match(&romol1, &romol2, false); + }); +} + +// running 1 test +// test bench_exact_match_no_chirality ... bench: 10,117.06 ns/iter (+/- 423.84) + +#[bench] +fn bench_exact_match_yes_chirality(b: &mut Bencher) { + let romol1 = ROMol::from_smiles( + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O", + ) + .unwrap(); + + let romol2 = romol1.clone(); + + b.iter(|| { + let _ = exact_match(&romol1, &romol2, true); + }); +} + +// running 1 test +// test bench_exact_match_yes_chirality ... bench: 18,131.02 ns/iter (+/- 790.45) + +#[bench] +fn bench_substructure_match_no_chirality(b: &mut Bencher) { + let super_mol = ROMol::from_smiles( + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O", + ) + .unwrap(); + let sub_mol = + ROMol::from_smiles("C[C@@H]1[C@H]([C@H]([C@@H](O1)[N]2C=NC3=C(N=CN=C23)N)O)O").unwrap(); + + let mut params = SubstructMatchParameters::default(); + params.set_use_chirality(false); + + b.iter(|| { + let _ = substruct_match(&super_mol, &sub_mol, ¶ms); + }); +} + +// running 1 test +// test bench_substructure_match_no_chirality ... bench: 4,209.80 ns/iter (+/- 318.14) + +#[bench] +fn bench_substructure_match_yes_chirality(b: &mut Bencher) { + let super_mol = ROMol::from_smiles( + "C[S+](CC[C@@H](C(=O)[O-])[NH3+])C[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O", + ) + .unwrap(); + let sub_mol = + ROMol::from_smiles("C[C@@H]1[C@H]([C@H]([C@@H](O1)[N]2C=NC3=C(N=CN=C23)N)O)O").unwrap(); + + let mut params = SubstructMatchParameters::default(); + params.set_use_chirality(true); + + b.iter(|| { + let _ = substruct_match(&super_mol, &sub_mol, ¶ms); + }); +} + +// running 1 test +// test bench_substructure_match_yes_chirality ... bench: 7,313.00 ns/iter (+/- 532.24) diff --git a/src/command_line/indexing/bulk_index.rs b/src/command_line/indexing/bulk_index.rs index c6c4cd2..46df24a 100644 --- a/src/command_line/indexing/bulk_index.rs +++ b/src/command_line/indexing/bulk_index.rs @@ -100,7 +100,7 @@ pub fn action(matches: &ArgMatches) -> eyre::Result<()> { Ok(()) } -fn create_tantivy_doc( +pub fn create_tantivy_doc( record: serde_json::Value, smiles_field: Field, fingerprint_field: Field, diff --git a/src/search/identity_search.rs b/src/search/identity_search.rs index 0acbcd1..6002182 100644 --- a/src/search/identity_search.rs +++ b/src/search/identity_search.rs @@ -39,7 +39,7 @@ pub fn identity_search( smiles_field, fingerprint_field, extra_data_field, - &searcher, + searcher, &query_mol_mutex.lock().unwrap(), query_fingerprint, use_chirality, diff --git a/src/search/structure_search.rs b/src/search/structure_search.rs index 77b4c4f..8b00c0e 100644 --- a/src/search/structure_search.rs +++ b/src/search/structure_search.rs @@ -68,7 +68,7 @@ pub fn structure_search( smiles_field, fingerprint_field, extra_data_field, - &searcher, + searcher, &query_mol_mutex.lock().unwrap(), query_fingerprint, method,