Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ To simulate selenium and production integration tests:
One of the key steps in ReDU is the updating of the database to include the latest identifications for files within ReDU. These are the following steps:

1. Download batch template for GNPS at ```https://redu.ucsd.edu/metabatchdump```
1. Run Batch Workflow for Spectral Library Search
1. Run [Batch Workflow for Spectral Library Search](https://github.com/mwang87/ReDU-MS2-GNPS/blob/master/code/search_all_data.py)
1. Get the set of tasks as tsv and save to [here](https://github.com/mwang87/ReDU-MS2-GNPS/blob/master/database/global_tasks.tsv).
1. Remove database [here](https://github.com/mwang87/ReDU-MS2-GNPS/tree/master/database)
1. Remove all untracked files in temp, this will be for the global pca
Expand Down
31 changes: 14 additions & 17 deletions code/search_all_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,26 @@
import credentials
from models import *

def main():
all_filenames = list(Filename.select())

def parse_metabatch_dump():
metabatch_filename = '../database/metabatchdump.tsv'
all_filenames = pd.read_table(metabatch_filename)

task_id_list = []
parallelism = len(all_filenames)

PARALLISM = 20
for i in range(PARALLISM):
partition_filenames = all_filenames[i::PARALLISM]
for index, row in all_filenames.iterrows():
filenames = row.filename
id = row.id

filenames_list = [filename.filepath for filename in partition_filenames]

print("Searching %d Files", len(filenames_list))

taskid = util.launch_GNPS_librarysearchworkflow(filenames_list, "ReDU-MS2 Global Analysis Populate %d of %d" % (i, PARALLISM), \
credentials.USERNAME, credentials.PASSWORD, "[email protected]")

print(taskid)

taskid = util.launch_GNPS_librarysearchworkflow(filenames, "ReDU-MS2 Global Analysis Populate %d of %d" % (id, parallelism), \
credentials.USERNAME, credentials.PASSWORD, "[email protected]")

task_id_list.append(taskid)

df = pd.DataFrame()
df["taskid"] = task_id_list
df.to_csv("./database/global_tasks.tsv", sep="\t", index=False)
df.to_csv("../database/global_tasks.tsv", sep="\t", index=False)

if __name__ == '__main__':
main()
parse_metabatch_dump()
Loading