Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/command_line/indexing/bulk_delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pub fn action(matches: &ArgMatches) -> eyre::Result<()> {
let (storage_dir, index_name) = split_path(index_path)?;
let index_manager = IndexManager::new(storage_dir.deref(), false)?;
let index = index_manager.open(index_name.deref())?;
let mut deleter = index.writer::<tantivy::TantivyDocument>(16 * 1024 * 1024)?;
let mut deleter = index.writer::<tantivy::TantivyDocument>(50 * 1024 * 1024)?;
let query_parser = QueryParser::for_index(&index, vec![]);

for smiles in smiles_list {
Expand Down
2 changes: 1 addition & 1 deletion src/command_line/indexing/bulk_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ pub fn action(matches: &ArgMatches) -> eyre::Result<()> {
let index_manager = IndexManager::new(storage_dir.deref(), false)?;

let index = index_manager.open(index_name.deref())?;
let mut writer = index.writer(16 * 1024 * 1024)?;
let mut writer = index.writer(50 * 1024 * 1024)?;
let schema = index.schema();

let file = File::open(json_path)?;
Expand Down
36 changes: 26 additions & 10 deletions src/command_line/indexing/index_sdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::command_line::prelude::*;
use rayon::prelude::*;
use rdkit::{MolBlockIter, RWMol};
use std::sync::{Arc, Mutex};
use tantivy::directory::MmapDirectory;

pub const NAME: &str = "index-sdf";

Expand Down Expand Up @@ -35,6 +36,12 @@ pub fn command() -> Command {
.short('c')
.num_args(1),
)
.arg(
Arg::new("create-or-reset-index")
.required(false)
.long("create-or-reset-index")
.num_args(0),
)
.arg(
Arg::new("commit")
.required(false)
Expand All @@ -52,6 +59,7 @@ pub fn action(matches: &ArgMatches) -> eyre::Result<()> {
.ok_or(eyre::eyre!("Failed to extract index path"))?;
let limit = matches.get_one::<String>("limit");
let chunksize = matches.get_one::<String>("chunk-size");
let reset_index: bool = matches.get_flag("create-or-reset-index");
let commit: bool = matches.get_flag("commit");

let chunksize = if let Some(chunksize) = chunksize {
Expand All @@ -67,14 +75,6 @@ pub fn action(matches: &ArgMatches) -> eyre::Result<()> {
limit
);

let index_dir_metadata = std::fs::metadata(index_dir);
if let Ok(metadata) = index_dir_metadata {
if metadata.is_dir() {
std::fs::remove_dir_all(index_dir)?;
}
}
std::fs::create_dir(index_dir)?;

let mol_iter = MolBlockIter::from_gz_file(sdf_path, true, true, false)
.map_err(|e| eyre::eyre!("could not read gz file: {:?}", e))?;

Expand All @@ -87,8 +87,23 @@ pub fn action(matches: &ArgMatches) -> eyre::Result<()> {
let schema = crate::schema::LIBRARY
.get("descriptor_v1")
.ok_or(eyre::eyre!("Failed to extract schema"))?;
let index = create_or_reset_index(index_dir, schema)?;
let mut index_writer = index.writer_with_num_threads(1, 50 * 1024 * 1024)?;

let index = if reset_index {
let index_dir_metadata = std::fs::metadata(index_dir);
if let Ok(metadata) = index_dir_metadata {
if metadata.is_dir() {
std::fs::remove_dir_all(index_dir)?;
}
}

std::fs::create_dir(index_dir)?;
create_or_reset_index(index_dir, schema)?
} else {
let mmap_directory = MmapDirectory::open(index_dir)?;
tantivy::Index::open(mmap_directory)?
};

let mut index_writer = index.writer(50 * 1024 * 1024)?;

let mut counter = 0;
let failed_counter: Arc<Mutex<usize>> = Arc::new(Mutex::new(0));
Expand All @@ -97,6 +112,7 @@ pub fn action(matches: &ArgMatches) -> eyre::Result<()> {

for mol in mol_iter {
if mol.is_err() {
counter += 1;
let mut num = failed_counter.lock().unwrap();
*num += 1;
continue;
Expand Down
2 changes: 1 addition & 1 deletion src/rest_api/api/indexing/bulk_delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub async fn v1_delete_index_bulk(
}
};

let mut deleter = match index.writer::<tantivy::TantivyDocument>(16 * 1024 * 1024) {
let mut deleter = match index.writer::<tantivy::TantivyDocument>(50 * 1024 * 1024) {
Ok(deleter) => deleter,
Err(e) => {
return DeleteIndexesBulkDeleteResponse::Err(Json(DeleteIndexBulkResponseError {
Expand Down
2 changes: 1 addition & 1 deletion src/rest_api/api/indexing/bulk_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub async fn v1_post_index_bulk(
}
};

let mut writer = match index.writer(16 * 1024 * 1024) {
let mut writer = match index.writer(50 * 1024 * 1024) {
Ok(writer) => writer,
Err(e) => {
return PostIndexesBulkIndexResponse::Err(Json(PostIndexBulkResponseError {
Expand Down
6 changes: 4 additions & 2 deletions tests/api_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ async fn test_bulk_indexing() -> eyre::Result<()> {
let results = searcher.search(&query, &TopDocs::with_limit(100))?;
assert_eq!(results.len(), 3);

let docs = results
let mut docs = results
.into_iter()
.map(|(_, doc_id)| searcher.doc::<tantivy::TantivyDocument>(doc_id).unwrap())
.map(|td| {
Expand All @@ -261,7 +261,9 @@ async fn test_bulk_indexing() -> eyre::Result<()> {
.to_owned()
})
.collect::<Vec<_>>();
assert_eq!(&docs, &["CC", "c1ccccc1", "c1ccc(CCc2ccccc2)cc1",]);

docs.sort();
assert_eq!(&docs, &["CC", "c1ccc(CCc2ccccc2)cc1", "c1ccccc1"]);

Ok(())
}
Expand Down
Loading