diff --git a/massql/msql.ebnf b/massql/msql.ebnf index c8b8f2e..e3caa3a 100644 --- a/massql/msql.ebnf +++ b/massql/msql.ebnf @@ -142,7 +142,9 @@ factor: floating | "aminoaciddelta(" aminoacids ")" | peptidefunction | "(" numericalexpression ")" + | multiplefunction peptidefunction: "peptide(" peptide "," "charge=" peptidecharge "," "ion=" peptideion ")" +multiplefunction: "multiple(mz=" factor "," "min=" multiplenumber "," "max=" multiplenumber ")" multiply: "*" divide: "/" plus: "+" @@ -154,6 +156,8 @@ peptide: /[A-Z][A-Z]*/ peptidecharge: /[1-9]/ peptideion: /[A-Za-z]/ +multiplenumber: /[1-9][0-9]*/ + // Boiler Plate %import common.ESCAPED_STRING -> STRING %import common.SIGNED_NUMBER -> NUMBER diff --git a/massql/msql_parser.py b/massql/msql_parser.py index 65d2be3..0323ea7 100644 --- a/massql/msql_parser.py +++ b/massql/msql_parser.py @@ -116,6 +116,18 @@ def mobilityfunction(self, items): def mobilityrange(self, items): return "mobilityrange" + + def multiplenumber(self, items): + return int(items[0]) + + def multiplefunction(self, items): + multiple_repeater_dict = {} + multiple_repeater_dict["type"] = "multiple" + multiple_repeater_dict["mz"] = items[0] + multiple_repeater_dict["min"] = items[1] + multiple_repeater_dict["max"] = items[2] + + return multiple_repeater_dict def qualifier(self, items): if len(items) == 1 and items[0] == "qualifierintensityreference": @@ -375,12 +387,26 @@ def numericalexpression(self, items): return items[0] has_variable = _has_variable(items) + has_multiple = _has_multiple(items) string_items = [str(item) for item in items] full_expression = "".join(string_items) if has_variable: return full_expression + + if has_multiple: + # then we are going to expand this into an OR query + mz = items[-1]["mz"] + min_value = items[-1]["min"] + max_value = items[-1]["max"] + + all_mz_values = [] + for i in range(min_value, max_value+1): + actual_mz = mz*i + all_mz_values.append(actual_mz) + + return all_mz_values # Calculating the expression calculated_value = math_parser.parse(full_expression).evaluate({}) @@ -429,8 +455,6 @@ def peptidefunction(self, items): exact_mass = mass.calculate_mass(sequence=items[0], ion_type=items[2].lower(), charge=int(items[1])) return exact_mass - - def string(self, s): (s,) = s return s[1:-1] @@ -450,6 +474,15 @@ def _has_variable(items): return False +def _has_multiple(items): + for item in items: + if isinstance(item, dict): + if "type" in item: + if item["type"] == "multiple": + return True + + return False + def _visualize_parse(input_query, path_to_grammar=None, output_filename="parse.png"): if path_to_grammar is None: path_to_grammar = os.path.join(os.path.dirname(__file__), "msql.ebnf") diff --git a/tests/test_parse.py b/tests/test_parse.py index 8e2bfd6..ee47095 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -229,13 +229,21 @@ def test_or_cardinality(): print(parsed_output) +def test_repeat_parse(): + query = """ + QUERY scaninfo(MS2DATA) WHERE MS2PROD=(formula(C3H5O2C2H6) + multiple(mz=formula(CH2),min=2, max=60) ) + """ + parsed_output = msql_parser.parse_msql(query) + print(parsed_output) def main(): + test_repeat_parse() + #test_ms1_multiple_or_with_variable() #test_xrange_parse() - test_parse() + #test_parse() #test_comment_parse() #test_number_expression_parse() #test_formula_expression_parse()