diff --git a/xicbatch/Makefile b/xicbatch/Makefile new file mode 100644 index 00000000..35257d08 --- /dev/null +++ b/xicbatch/Makefile @@ -0,0 +1,7 @@ +include ../Makefile.credentials +include ../Makefile.deploytemplate + +WORKFLOW_NAME=xicbatch +TOOL_FOLDER_NAME=xicbatch +WORKFLOW_VERSION=release_28 +WORKFLOW_DESCRIPTION="xicbatch" diff --git a/xicbatch/tools/xicbatch/calculate_xic.py b/xicbatch/tools/xicbatch/calculate_xic.py new file mode 100644 index 00000000..e2887aa0 --- /dev/null +++ b/xicbatch/tools/xicbatch/calculate_xic.py @@ -0,0 +1,136 @@ +import os +import sys +import numpy as np +import pandas as pd + +import argparse +import uuid +import glob +import shutil +from scipy import integrate + +def calculate_xic(filename, mz, rt, mz_tolerance, rt_min, rt_max, msaccess_path, feature_name): + temp_result_folder = os.path.join(str(uuid.uuid4())) + + mz_lower = mz - mz_tolerance + mz_upper = mz + mz_tolerance + + command = 'export LC_ALL=C && {} {} -o {} -x "tic mz={},{} delimiter=tab" --filter "msLevel 1" --filter "scanTime ["{},{}"]"'.format( + msaccess_path, filename, temp_result_folder, mz_lower, mz_upper, rt_min * 60, rt_max * 60) + + print(command, mz_lower, mz_upper, mz_tolerance) + os.system(command) + + result_filename = glob.glob(os.path.join(temp_result_folder, "*"))[0] + result_df = pd.read_csv(result_filename, sep="\t", skiprows=1) + + xic_df = pd.DataFrame() + xic_df["rt"] = result_df["rt"] / 60.0 + xic_df["int"] = result_df["sumIntensity"] + + # Remove temp folder + shutil.rmtree(temp_result_folder) + + return xic_df + +def calculate_ms2(filename, mz, rt, mz_tolerance, rt_min, rt_max, msaccess_path, feature_name): + temp_result_folder = os.path.join(str(uuid.uuid4())) + + command = 'export LC_ALL=C && {} {} -o {} -x "spectrum_table delimiter=tab" --filter "mzPrecursors [{}] mzTol={} Da" --filter "msLevel 2" --filter "scanTime ["{},{}"]"'.format( + msaccess_path, filename, temp_result_folder, mz, mz_tolerance, rt_min * 60, rt_max * 60) + + os.system(command) + + result_filename = glob.glob(os.path.join(temp_result_folder, "*"))[0] + result_df = pd.read_csv(result_filename, sep="\t", skiprows=1) + + formatted_df = pd.DataFrame() + formatted_df["scan"] = result_df["id"].apply(lambda x: x.split(".")[-1]) + formatted_df["filename"] = os.path.basename(filename) + formatted_df["rt"] = result_df["rt"] / 60.0 + formatted_df["tic"] = result_df["TIC"] + formatted_df["precursorMZ"] = result_df["precursorMZ"] + + print(formatted_df) + + # Remove temp folder + shutil.rmtree(temp_result_folder) + + return formatted_df + + +def main(): + parser = argparse.ArgumentParser(description='Creating XIC') + parser.add_argument('input_folder', help='input_mgf') + parser.add_argument('output_results', help='output_results') + parser.add_argument('extraction_results', help='extraction_results') + parser.add_argument('ms2_extraction_results', help='ms2_extraction_results') + parser.add_argument('msaccess_path', help='msaccess_path') + parser.add_argument('--mz', default=None, help='mz') + parser.add_argument('--rt', default=None, help='rt') + parser.add_argument('--mztol', default=None, help='mztol') + parser.add_argument('--rttol', default=None, help='rttol') + + args = parser.parse_args() + + all_input_files = glob.glob(os.path.join(args.input_folder, "*")) + + output_list = [] + output_full_xic = [] + output_ms2 = [] + for filename in all_input_files: + mz = float(args.mz) + rt = float(args.rt) + + xic_df = calculate_xic(filename, + mz, rt, + float(args.mztol), + float(args.rt) - float(args.rttol), + float(args.rt) + float(args.rttol), args.msaccess_path, str(args.mz)) + + try: + formatted_df = calculate_ms2(filename, mz, rt, + float(args.mztol), + float(args.rt) - float(args.rttol), + float(args.rt) + float(args.rttol), args.msaccess_path, str(args.mz)) + formatted_df["query"] = "{}_{}".format(mz, rt) + output_ms2.append(formatted_df) + except: + pass + + xic_df["query"] = "{}_{}".format(mz, rt) + xic_df["filename"] = os.path.basename(filename) + + integration_value = integrate.trapz(xic_df["int"], x=xic_df["rt"]) + + xic_df = xic_df.sort_values(by=['int'], ascending=False) + max_int_rt = xic_df["rt"].iloc[0] + max_int = xic_df["int"].iloc[0] + + output_dict = {} + output_dict["filename"] = os.path.basename(filename) + output_dict["integration_value"] = integration_value + output_dict["mz"] = mz + output_dict["rt"] = rt + output_dict["max_int_rt"] = max_int_rt + output_dict["max_int"] = max_int + output_dict["drawing"] = "{}_{}_{}.png".format(os.path.basename(filename), mz, rt) + + output_full_xic.append(xic_df) + + output_list.append(output_dict) + + results_df = pd.DataFrame(output_list) + results_df.to_csv(args.output_results, sep="\t", index=False) + + pd.concat(output_full_xic).to_csv(args.extraction_results, sep='\t', index=False) + + try: + pd.concat(output_ms2).to_csv(args.ms2_extraction_results, sep='\t', index=False) + except: + pd.DataFrame().to_csv(args.ms2_extraction_results, sep='\t', index=False) + pass + + +if __name__ == "__main__": + main() diff --git a/xicbatch/tools/xicbatch/demangle.py b/xicbatch/tools/xicbatch/demangle.py new file mode 100644 index 00000000..c0fb228b --- /dev/null +++ b/xicbatch/tools/xicbatch/demangle.py @@ -0,0 +1,32 @@ +import os +import sys +import pandas as pd + +import argparse +import ming_proteosafe_library + +def main(): + parser = argparse.ArgumentParser(description='Creating Demangling') + parser.add_argument('input_results', help='input_mgf') + parser.add_argument('output_results', help='output_results') + parser.add_argument('params', help='msaccess_path') + args = parser.parse_args() + + params_dict = ming_proteosafe_library.parse_xml_file(open(args.params)) + mangled_mapping = ming_proteosafe_library.get_mangled_file_mapping(params_dict) + + results_df = pd.read_csv(args.input_results, sep="\t") + results_list = results_df.to_dict(orient="records") + + for result in results_list: + filename = result["filename"] + full_ccms_path = mangled_mapping[filename] + result["full_ccms_path"] = full_ccms_path + + + demanged_results_df = pd.DataFrame(results_list) + demanged_results_df.to_csv(args.output_results, sep="\t", index=False) + + +if __name__ == "__main__": + main() diff --git a/xicbatch/tools/xicbatch/draw_results.py b/xicbatch/tools/xicbatch/draw_results.py new file mode 100644 index 00000000..e92be6db --- /dev/null +++ b/xicbatch/tools/xicbatch/draw_results.py @@ -0,0 +1,56 @@ +import os +import sys +import pandas as pd + +import argparse +import ming_proteosafe_library + +from plotnine import * + + +def main(): + parser = argparse.ArgumentParser(description='Creating Demangling') + parser.add_argument('extracted_results', help='extracted_results') + parser.add_argument('output_folder', help='output_folder') + + args = parser.parse_args() + + extraction_df = pd.read_csv(args.extracted_results, sep="\t") + + + p = ( + ggplot(extraction_df, aes(x='rt', y='int', color='full_ccms_path')) + + geom_line() # line plot + + labs(x='RT', y='Intensity') + + theme(figure_size=(20,16)) + ) + + p.save(os.path.join(args.output_folder, "merged.png"), limitsize=False) + + # TODO: Drawing individual per file + all_filenames = list(set(extraction_df["filename"])) + all_queries = list(set(extraction_df["query"])) + for filename in all_filenames: + for query in all_queries: + output_filename = "{}_{}.png".format(filename, query) + filtered_df = extraction_df[extraction_df["filename"] == filename] + filtered_df = filtered_df[filtered_df["query"] == query] + + print(filtered_df) + print(len(filtered_df)) + + p = ( + ggplot(filtered_df, aes(x='rt', y='int')) + + geom_line() # line plot + + labs(x='RT', y='Intensity') + + theme(figure_size=(15,10)) + ) + + p.save(os.path.join(args.output_folder, output_filename), limitsize=False) + + + + + +if __name__ == "__main__": + main() diff --git a/xicbatch/tools/xicbatch/ming_fileio_library.py b/xicbatch/tools/xicbatch/ming_fileio_library.py new file mode 120000 index 00000000..64ef908d --- /dev/null +++ b/xicbatch/tools/xicbatch/ming_fileio_library.py @@ -0,0 +1 @@ +../../../shared_code/ming_fileio_library.py \ No newline at end of file diff --git a/xicbatch/tools/xicbatch/ming_proteosafe_library.py b/xicbatch/tools/xicbatch/ming_proteosafe_library.py new file mode 120000 index 00000000..9d4083af --- /dev/null +++ b/xicbatch/tools/xicbatch/ming_proteosafe_library.py @@ -0,0 +1 @@ +../../../shared_code/ming_proteosafe_library.py \ No newline at end of file diff --git a/xicbatch/tools/xicbatch/msaccess b/xicbatch/tools/xicbatch/msaccess new file mode 120000 index 00000000..14358c90 --- /dev/null +++ b/xicbatch/tools/xicbatch/msaccess @@ -0,0 +1 @@ +../../.././molecular-librarysearch-v2/tools/molecularsearch/msaccess \ No newline at end of file diff --git a/xicbatch/xicbatch/binding.xml b/xicbatch/xicbatch/binding.xml new file mode 100644 index 00000000..6efa21b2 --- /dev/null +++ b/xicbatch/xicbatch/binding.xml @@ -0,0 +1,91 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/flow.xml b/xicbatch/xicbatch/flow.xml new file mode 100644 index 00000000..c6a88e8a --- /dev/null +++ b/xicbatch/xicbatch/flow.xml @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/input.xml b/xicbatch/xicbatch/input.xml new file mode 100644 index 00000000..d86a09b7 --- /dev/null +++ b/xicbatch/xicbatch/input.xml @@ -0,0 +1,113 @@ + + + xicbatch + xicbatch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Spectrum Files + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/result.xml b/xicbatch/xicbatch/result.xml new file mode 100644 index 00000000..25835e1e --- /dev/null +++ b/xicbatch/xicbatch/result.xml @@ -0,0 +1,100 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/tool.xml b/xicbatch/xicbatch/tool.xml new file mode 100644 index 00000000..3ce8d6ea --- /dev/null +++ b/xicbatch/xicbatch/tool.xml @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +