From 7c41e2031c91fd97b9138f34fe66edbef744092e Mon Sep 17 00:00:00 2001 From: DiazRock Date: Wed, 26 Feb 2020 15:40:47 -0500 Subject: [PATCH 01/23] First step in development. Go to other branch --- src/coolc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coolc.sh b/src/coolc.sh index 3088de4f..d4f803a5 100755 --- a/src/coolc.sh +++ b/src/coolc.sh @@ -5,7 +5,7 @@ OUTPUT_FILE=${INPUT_FILE:0: -2}mips # Si su compilador no lo hace ya, aquí puede imprimir la información de contacto echo "LINEA_CON_NOMBRE_Y_VERSION_DEL_COMPILADOR" # TODO: Recuerde cambiar estas -echo "Copyright (c) 2019: Nombre1, Nombre2, Nombre3" # TODO: líneas a los valores correctos +echo "Copyright (c) 2020: Alejandro Díaz Roque, Rafael Horrach" # TODO: líneas a los valores correctos # Llamar al compilador echo "Compiling $INPUT_FILE into $OUTPUT_FILE" From 577524643dc5ef5b183af0aa679e5c3989590668 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Thu, 27 Feb 2020 18:09:27 -0500 Subject: [PATCH 02/23] Added a main.py file. The coolc.sh is a wrapper for it. --- src/coolc.sh | 11 ++++++++++- src/main.py | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 src/main.py diff --git a/src/coolc.sh b/src/coolc.sh index d4f803a5..f3613a9e 100755 --- a/src/coolc.sh +++ b/src/coolc.sh @@ -1,11 +1,20 @@ # Incluya aquí las instrucciones necesarias para ejecutar su compilador + INPUT_FILE=$1 OUTPUT_FILE=${INPUT_FILE:0: -2}mips + +#El compilador va a ser un py. +#Aquí llamamos al compilador con los valores de la entrada estándar. + + + # Si su compilador no lo hace ya, aquí puede imprimir la información de contacto echo "LINEA_CON_NOMBRE_Y_VERSION_DEL_COMPILADOR" # TODO: Recuerde cambiar estas -echo "Copyright (c) 2020: Alejandro Díaz Roque, Rafael Horrach" # TODO: líneas a los valores correctos +echo "CopyLeft (L) 2020: Alejandro Díaz Roque, Rafael Horrach" # Llamar al compilador +python3 main.py $@ echo "Compiling $INPUT_FILE into $OUTPUT_FILE" + diff --git a/src/main.py b/src/main.py new file mode 100644 index 00000000..3c3f8db3 --- /dev/null +++ b/src/main.py @@ -0,0 +1,14 @@ +import sys, fileinput +from argparse import ArgumentParser + + +parser_input = ArgumentParser(description= 'This is the Diaz-Horrach cool compiler, an school project.\nRead this help and see the ofitial repo') +parser_input.add_argument('files_for_compile', help = 'The file(s) to be compiled', nargs= '*') +parser_input.add_argument("--lex", '-l', help = 'Output the lexer for the .cl file', action = 'store_true') +parser_input.add_argument("--ast", help = 'Output the abstract syntax tree (AST) for the .cl file', action = 'store_true') +parser_input.add_argument("--outputFile", '-oF', help = 'Put the info of the output options in the specified file.\n If no output option is specified the file creates empty.') +args = parser_input.parse_args() +working_input = fileinput.input(files = args.files_for_compile) +if args.outputFile: + fd = open(mode= 'x', file = './output_files' + args.outputFile) + From d87bae5105463d6725f2f098da50ad5bc2489d4a Mon Sep 17 00:00:00 2001 From: DiazRock Date: Thu, 27 Feb 2020 21:38:40 -0500 Subject: [PATCH 03/23] Added the base for the compiler structure --- src/compiler/__init__.py | 0 src/compiler/components/__init__.py | 0 src/compiler/components/abstract_component.py | 6 ++++++ src/compiler/components/dummy_component.py | 13 +++++++++++++ src/compiler/initialize.py | 10 ++++++++++ src/compiler/utils/__init__.py | 0 src/compiler/utils/container.py | 15 +++++++++++++++ 7 files changed, 44 insertions(+) create mode 100644 src/compiler/__init__.py create mode 100644 src/compiler/components/__init__.py create mode 100644 src/compiler/components/abstract_component.py create mode 100644 src/compiler/components/dummy_component.py create mode 100644 src/compiler/initialize.py create mode 100644 src/compiler/utils/__init__.py create mode 100644 src/compiler/utils/container.py diff --git a/src/compiler/__init__.py b/src/compiler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/compiler/components/__init__.py b/src/compiler/components/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/compiler/components/abstract_component.py b/src/compiler/components/abstract_component.py new file mode 100644 index 00000000..3c5fe019 --- /dev/null +++ b/src/compiler/components/abstract_component.py @@ -0,0 +1,6 @@ + +class Component: + def __init__(self, input): + self.input = input + + \ No newline at end of file diff --git a/src/compiler/components/dummy_component.py b/src/compiler/components/dummy_component.py new file mode 100644 index 00000000..77d5888a --- /dev/null +++ b/src/compiler/components/dummy_component.py @@ -0,0 +1,13 @@ +from abstract_component import Component + + +class dummy_component (Component): + def __init__(self, input, component_type: str): + super().__init__(input) + self.component_type = component_type + + + def output(self): + return "Sorry, this is just a dummy %s component" %(self.component_type) + + diff --git a/src/compiler/initialize.py b/src/compiler/initialize.py new file mode 100644 index 00000000..c7c3c987 --- /dev/null +++ b/src/compiler/initialize.py @@ -0,0 +1,10 @@ +from .utils.container import component_container + + +class compiler: + def __init__(self, lexer, parser): + self.symbolTable = {} + + self.lexer = lexer + self.parser = parser + pass \ No newline at end of file diff --git a/src/compiler/utils/__init__.py b/src/compiler/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/compiler/utils/container.py b/src/compiler/utils/container.py new file mode 100644 index 00000000..01730e0c --- /dev/null +++ b/src/compiler/utils/container.py @@ -0,0 +1,15 @@ +from dependency_injector import containers, providers +from compiler.components.dummy_component import dummy_component + + + +class component_container (containers.DeclarativeContainer): + dummy_lexer = providers.Factory(dummy_component, "Lexer") + dummy_parser = providers.Factory(dummy_component, "Parser") + + + + + + + From 664f89d0ce7a8cf9c0a2e92537b01cf655c18cac Mon Sep 17 00:00:00 2001 From: DiazRock Date: Fri, 28 Feb 2020 13:51:53 -0500 Subject: [PATCH 04/23] Added lexer definitions --- src/compiler/utils/lexer_definitions.py | 47 +++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 src/compiler/utils/lexer_definitions.py diff --git a/src/compiler/utils/lexer_definitions.py b/src/compiler/utils/lexer_definitions.py new file mode 100644 index 00000000..0acb7292 --- /dev/null +++ b/src/compiler/utils/lexer_definitions.py @@ -0,0 +1,47 @@ +tokens_collection = ( + # Identifiers + "ID", "TYPE", + + # Primitive Types + "INTEGER", "STRING", "BOOLEAN", + + # Literals + "LPAREN", "RPAREN", "LBRACE", "RBRACE", "COLON", "COMMA", "DOT", "SEMICOLON", "AT", + + # Operators + "PLUS", "MINUS", "MULTIPLY", "DIVIDE", "EQ", "LT", "LTEQ", "ASSIGN", "INT_COMP", "NOT", + + # Special Operators + "ARROW" +) + +class keyword(str): + def __eq__(self, other: str): + val = str(self) + if val != 'true' and val != 'false': + return val == other.lower() + return val[0] == other[0] and val[1:] == other.lower()[1:] + +basic_keywords = { + "case": keyword("case"), + "class": keyword("class"), + "else": keyword("else"), + "esac": keyword("esac"), + "fi": keyword("fi"), + "if": keyword("if"), + "in": keyword("in"), + "inherits": keyword("inherits"), + "isvoid": keyword("isvoid"), + "let": keyword("let"), + "loop": keyword("loop"), + "new": keyword("new"), + "of": keyword("of"), + "pool": keyword("pool"), + "self": keyword("self"), + "then": keyword("then"), + "while": keyword("while"), + "true": keyword("true"), + "false": keyword("false") +} + + From fb038dbbd14e5f74f9e1ad9aeadc95ae52db0e33 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Sat, 29 Feb 2020 12:30:48 -0500 Subject: [PATCH 05/23] All the cool lexer analyzer declarations --- src/compiler/utils/lexer_definitions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/compiler/utils/lexer_definitions.py b/src/compiler/utils/lexer_definitions.py index 0acb7292..ab9f2b69 100644 --- a/src/compiler/utils/lexer_definitions.py +++ b/src/compiler/utils/lexer_definitions.py @@ -41,7 +41,10 @@ def __eq__(self, other: str): "then": keyword("then"), "while": keyword("while"), "true": keyword("true"), - "false": keyword("false") + "false": keyword("false"), + 'not' : keyword('not') } +keywords_for_built_in_types = { +} From 6d7a1b43179511b09cf982c33facc39f512263cb Mon Sep 17 00:00:00 2001 From: DiazRock Date: Sat, 29 Feb 2020 12:32:36 -0500 Subject: [PATCH 06/23] More changes --- src/compiler/components/abstract_component.py | 21 +- src/compiler/components/dummy_component.py | 14 +- src/compiler/components/lexer_analyzer.py | 13 ++ src/compiler/initialize.py | 4 +- src/compiler/utils/compiler_containers.py | 21 ++ src/compiler/utils/lexer_definitions.py | 201 +++++++++++++++++- 6 files changed, 260 insertions(+), 14 deletions(-) create mode 100644 src/compiler/components/lexer_analyzer.py create mode 100644 src/compiler/utils/compiler_containers.py diff --git a/src/compiler/components/abstract_component.py b/src/compiler/components/abstract_component.py index 3c5fe019..30b7ed07 100644 --- a/src/compiler/components/abstract_component.py +++ b/src/compiler/components/abstract_component.py @@ -1,6 +1,19 @@ class Component: - def __init__(self, input): - self.input = input - - \ No newline at end of file + def __init__(self, + input_info, + component_name, + debug_session = False, + error_log_file = None, + build_after_initialize = True): + self.input_info = input_info + self.component_name = component_name + self.debug_session = debug_session + self.error_log_file = error_log_file + if build_after_initialize: self.build_component() + + + def build_component (self): + if self.debug_session: + print('Building %s component' %self.component_name) + diff --git a/src/compiler/components/dummy_component.py b/src/compiler/components/dummy_component.py index 77d5888a..36e261eb 100644 --- a/src/compiler/components/dummy_component.py +++ b/src/compiler/components/dummy_component.py @@ -1,13 +1,15 @@ -from abstract_component import Component +from .abstract_component import Component class dummy_component (Component): - def __init__(self, input, component_type: str): - super().__init__(input) - self.component_type = component_type + def __init__(self, component_name, *args, **kwargs): + component_name = component_name + "dummy_" + super().__init__(*args, component_name = component_name + "dummy_", **kwargs) + - def output(self): - return "Sorry, this is just a dummy %s component" %(self.component_type) + def build_component(self): + super().build_component() + return "Sorry, this is just a %s component" %(self.component_name) diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py new file mode 100644 index 00000000..1ff485c8 --- /dev/null +++ b/src/compiler/components/lexer_analyzer.py @@ -0,0 +1,13 @@ +from .abstract_component import Component + + +class lexer_analyzer(Component): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @property + def tokens_collection(): + pass + + def build_component(self): + super().build_component() \ No newline at end of file diff --git a/src/compiler/initialize.py b/src/compiler/initialize.py index c7c3c987..317c53dc 100644 --- a/src/compiler/initialize.py +++ b/src/compiler/initialize.py @@ -1,10 +1,10 @@ -from .utils.container import component_container +from .utils.compiler_containers import component_container class compiler: def __init__(self, lexer, parser): self.symbolTable = {} - + self.lexer = lexer self.parser = parser pass \ No newline at end of file diff --git a/src/compiler/utils/compiler_containers.py b/src/compiler/utils/compiler_containers.py new file mode 100644 index 00000000..7fe71041 --- /dev/null +++ b/src/compiler/utils/compiler_containers.py @@ -0,0 +1,21 @@ +from dependency_injector import containers, providers +from compiler.components.dummy_component import dummy_component +import lexer_definitions + + +class component_container (containers.DeclarativeContainer): + dummy_lexer = providers.Factory(dummy_component, "Lexer") + dummy_parser = providers.Factory(dummy_component, "Parser") + + +container_dict = { + 'lexer_options': { 'd': component_container.dummy_lexer }, + 'parser_options': {'d' : component_container.dummy_parser} + } + +class lexer_analyzer_dependency_container (containers.DeclarativeContainer): + reserved_keywords_cool = providers.Callable(lambda value: value, lexer_definitions.basic_keywords) + tokens_collection_cool = providers.Callable(lambda value: value, lexer_definitions.tokens_collection) + simple_rules_cool = providers.Callable(lambda value: value, lexer_definitions.simple_rules_cool) + + diff --git a/src/compiler/utils/lexer_definitions.py b/src/compiler/utils/lexer_definitions.py index ab9f2b69..240ae8fd 100644 --- a/src/compiler/utils/lexer_definitions.py +++ b/src/compiler/utils/lexer_definitions.py @@ -1,3 +1,5 @@ +from ply.lex import TOKEN + tokens_collection = ( # Identifiers "ID", "TYPE", @@ -45,6 +47,201 @@ def __eq__(self, other: str): 'not' : keyword('not') } -keywords_for_built_in_types = { - +#Simple rules for cool +simple_rules = { + "t_LPAREN" : r'\(', + "t_RPAREN" : r'\)', + "t_LBRACE" : r'\{', + "t_RBRACE" : r'\}', + "t_COLON" : r'\:' , + "t_COMMA" : r'\,' , + "t_DOT" : r'\.' , + "t_SEMICOLON" : r'\;', + "t_AT" : r'\@', + "t_PLUS" : r'\+', + "t_MINUS" : r'\-', + "t_MULTIPLY" : r'\*', + "t_DIVIDE" : r'\/', + "t_EQ" : r'\=', + "t_LT" : r'\<', + "t_LTEQ" : r'\<\=', + "t_ASSIGN" : r'\<\-', + "t_INT_COMP" : r'~', + "t_NOT" : r'not', + "t_ignore" : ' \t\r\f' } +""" t_LPAREN = r'\(' # ( +t_RPAREN = r'\)' # ) +t_LBRACE = r'\{' # { +t_RBRACE = r'\}' # } +t_COLON = r'\:' # : +t_COMMA = r'\,' # , +t_DOT = r'\.' # . +t_SEMICOLON = r'\;' # ; +t_AT = r'\@' # @ +t_PLUS = r'\+' # + +t_MINUS = r'\-' # - +t_MULTIPLY = r'\*' # * +t_DIVIDE = r'\/' # / +t_EQ = r'\=' # = +t_LT = r'\<' # < +t_LTEQ = r'\<\=' # <= +t_ASSIGN = r'\<\-' # <- +t_INT_COMP = r'~' # ~ +t_NOT = r'not' # not + +#ignore spaces +t_ignore = ' \t\r\f' """ + +#Complex rules for cool + +@TOKEN(r"(true|false)") +def t_BOOLEAN (token): + token.value = True if token.value == basic_keywords['true'] else False + return token + +@TOKEN(r"\d+") +def t_INTEGER(token): + token.value = int(token.value) + return token + +@TOKEN(r"[A-Z][a-zA-Z_0-9]*") +def t_TYPE(token): + token.type = basic_keywords.get(token.value, 'TYPE') + return token + +@TOKEN(r"\n+") +def t_newline(token): + token.lexer.lineno += len(token.value) + + +@TOKEN(r"[a-z][a-zA-Z_0-9]*") +def t_ID(token): + token.type = basic_keywords.get(token.value, 'ID') + return token + + + +#Lexer states +def states(): + return ( + ("STRING", "exclusive"), + ("COMMENT", "exclusive") + ) + +# The string states + +@TOKEN(r"\"") +def t_STRING_start(token): + token.lexer.push_state("STRING") + token.lexer.string_backslashed = False + token.lexer.stringbuf = "" + + +@TOKEN(r"\n") +def t_STRING_newline(token): + token.lexer.lineno += 1 + if not token.lexer.string_backslashed: + print("String newline not escaped") + token.lexer.skip(1) + else: + token.lexer.string_backslashed = False + + +@TOKEN(r"\"") +def t_STRING_end(self, token): + if not token.lexer.string_backslashed: + token.lexer.pop_state() + token.value = token.lexer.stringbuf + token.type = "STRING" + return token + else: + token.lexer.stringbuf += '"' + token.lexer.string_backslashed = False + +@TOKEN(r"[^\n]") +def t_STRING_anything(self, token): + if token.lexer.string_backslashed: + if token.value == 'b': + token.lexer.stringbuf += '\b' + elif token.value == 't': + token.lexer.stringbuf += '\t' + elif token.value == 'n': + token.lexer.stringbuf += '\n' + elif token.value == 'f': + token.lexer.stringbuf += '\f' + elif token.value == '\\': + token.lexer.stringbuf += '\\' + else: + token.lexer.stringbuf += token.value + token.lexer.string_backslashed = False + else: + if token.value != '\\': + token.lexer.stringbuf += token.value + else: + token.lexer.string_backslashed = True + + + +complex_rules = [ + t_BOOLEAN, + t_INTEGER, + t_TYPE, + t_newline, + t_ID, + #---------- + #String states rules + t_STRING_start, + t_STRING_newline, + t_STRING_anything, + t_STRING_end + #---------- + ] + + +# STRING ignored characters +t_STRING_ignore = '' + + +# The comment states +@TOKEN(r"\(\*") +def t_COMMENT_start(self, token): + token.lexer.push_state("COMMENT") + token.lexer.comment_count = 0 + +#Comments can be recursive +@TOKEN(r"\(\*") +def t_COMMENT_startanother(self, t): + t.lexer.comment_count += 1 + +@TOKEN(r"\*\)") +def t_COMMENT_end(self, token): + if token.lexer.comment_count == 0: + token.lexer.pop_state() + else: + token.lexer.comment_count -= 1 + +# COMMENT ignored characters +t_COMMENT_ignore = '' + + + + + + +#Error handlers + +# STRING error handler +def t_STRING_error(self, token): + print("Illegal character! Line: {0}, character: {1}".format(token.lineno, token.value[0])) + token.lexer.skip(1) + + +# COMMENT error handler +def t_COMMENT_error(self, token): + token.lexer.skip(1) + +def t_error(self, token): + print("Illegal character! Line: {0}, character: {1}".format(token.lineno, token.value[0])) + token.lexer.skip(1) + From 888a63b7c3e2a8bbff3fb3d9bd85db7e2e819389 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Sat, 29 Feb 2020 15:08:18 -0500 Subject: [PATCH 07/23] Added functionality for lexer analyzer --- src/compiler/components/lexer_analyzer.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py index 1ff485c8..31362b32 100644 --- a/src/compiler/components/lexer_analyzer.py +++ b/src/compiler/components/lexer_analyzer.py @@ -1,13 +1,22 @@ from .abstract_component import Component - +from ..utils.compiler_containers import lexer_analyzer_dependency_container as injector +from ply import lex class lexer_analyzer(Component): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + @property - def tokens_collection(): - pass - + def tokens_collection(self): + return injector.tokens_collection_cool + + @property + def basic_reserved(self): + return injector.reserved_keywords_cool + def build_component(self): - super().build_component() \ No newline at end of file + self.reserved = self.basic_reserved.keys() + self.tokens = self.tokens_collection + tuple(self.basic_reserved.values()) + self.lexer = lex.lex(module = self) + \ No newline at end of file From c6663a7ca0d6c1010eee8d77619650f1d5b8b7d2 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Sat, 29 Feb 2020 20:03:30 -0500 Subject: [PATCH 08/23] I think that the lexer analyzer is ready for test --- src/compiler/components/lexer_analyzer.py | 36 ++++++--- src/compiler/utils/compiler_containers.py | 14 +++- src/compiler/utils/container.py | 15 ---- src/compiler/utils/lexer_definitions.py | 99 ++++++++++++++--------- src/main.py | 12 ++- 5 files changed, 98 insertions(+), 78 deletions(-) delete mode 100644 src/compiler/utils/container.py diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py index 31362b32..06b9efc8 100644 --- a/src/compiler/components/lexer_analyzer.py +++ b/src/compiler/components/lexer_analyzer.py @@ -3,20 +3,32 @@ from ply import lex class lexer_analyzer(Component): - def __init__(self, *args, **kwargs): + def __init__(self, + tokens_collection, + basic_keywords, + simple_rules, + complex_rules, + *args, + **kwargs): + self.inject_complex_rules(complex_rules) + self.tokens_collection = tokens_collection + self.basic_keywords = basic_keywords super().__init__(*args, **kwargs) - - @property - def tokens_collection(self): - return injector.tokens_collection_cool - - @property - def basic_reserved(self): - return injector.reserved_keywords_cool + def inject_complex_rules(self, complex_rules): + for x in complex_rules: + super().__setattr__(x.__name__, x) + def build_component(self): - self.reserved = self.basic_reserved.keys() - self.tokens = self.tokens_collection + tuple(self.basic_reserved.values()) + self.reserved = self.basic_keywords.keys() + self.tokens = self.tokens_collection + tuple(self.basic_keywords.values()) self.lexer = lex.lex(module = self) - \ No newline at end of file + + def input_lexer(self, cool_program_source_code): + self.lexer.input(cool_program_source_code) + + + def token(self): + self.last_token = self.lexer.token() + return self.last_token diff --git a/src/compiler/utils/compiler_containers.py b/src/compiler/utils/compiler_containers.py index 7fe71041..a9162f38 100644 --- a/src/compiler/utils/compiler_containers.py +++ b/src/compiler/utils/compiler_containers.py @@ -1,10 +1,11 @@ from dependency_injector import containers, providers from compiler.components.dummy_component import dummy_component import lexer_definitions - +from compiler.components.lexer_analyzer import lexer_analyzer class component_container (containers.DeclarativeContainer): dummy_lexer = providers.Factory(dummy_component, "Lexer") + cool_lexer = providers.Factory(lexer_analyzer) dummy_parser = providers.Factory(dummy_component, "Parser") @@ -14,8 +15,13 @@ class component_container (containers.DeclarativeContainer): } class lexer_analyzer_dependency_container (containers.DeclarativeContainer): - reserved_keywords_cool = providers.Callable(lambda value: value, lexer_definitions.basic_keywords) - tokens_collection_cool = providers.Callable(lambda value: value, lexer_definitions.tokens_collection) - simple_rules_cool = providers.Callable(lambda value: value, lexer_definitions.simple_rules_cool) + #This is just readonly properties + reserved_keywords_cool = lexer_definitions.basic_keywords + tokens_collection_cool = lexer_definitions.tokens_collection + simple_rules_cool = lexer_definitions.simple_rules + complex_rules_cool = lexer_definitions.complex_rules + error_handlers_cool = lexer_definitions.error_handlers + #---------------- + diff --git a/src/compiler/utils/container.py b/src/compiler/utils/container.py deleted file mode 100644 index 01730e0c..00000000 --- a/src/compiler/utils/container.py +++ /dev/null @@ -1,15 +0,0 @@ -from dependency_injector import containers, providers -from compiler.components.dummy_component import dummy_component - - - -class component_container (containers.DeclarativeContainer): - dummy_lexer = providers.Factory(dummy_component, "Lexer") - dummy_parser = providers.Factory(dummy_component, "Parser") - - - - - - - diff --git a/src/compiler/utils/lexer_definitions.py b/src/compiler/utils/lexer_definitions.py index 240ae8fd..2b29f0f2 100644 --- a/src/compiler/utils/lexer_definitions.py +++ b/src/compiler/utils/lexer_definitions.py @@ -48,29 +48,7 @@ def __eq__(self, other: str): } #Simple rules for cool -simple_rules = { - "t_LPAREN" : r'\(', - "t_RPAREN" : r'\)', - "t_LBRACE" : r'\{', - "t_RBRACE" : r'\}', - "t_COLON" : r'\:' , - "t_COMMA" : r'\,' , - "t_DOT" : r'\.' , - "t_SEMICOLON" : r'\;', - "t_AT" : r'\@', - "t_PLUS" : r'\+', - "t_MINUS" : r'\-', - "t_MULTIPLY" : r'\*', - "t_DIVIDE" : r'\/', - "t_EQ" : r'\=', - "t_LT" : r'\<', - "t_LTEQ" : r'\<\=', - "t_ASSIGN" : r'\<\-', - "t_INT_COMP" : r'~', - "t_NOT" : r'not', - "t_ignore" : ' \t\r\f' -} -""" t_LPAREN = r'\(' # ( +t_LPAREN = r'\(' # ( t_RPAREN = r'\)' # ) t_LBRACE = r'\{' # { t_RBRACE = r'\}' # } @@ -91,7 +69,30 @@ def __eq__(self, other: str): t_NOT = r'not' # not #ignore spaces -t_ignore = ' \t\r\f' """ +t_ignore = ' \t\r\f' + +simple_rules = [ + t_LPAREN, + t_RPAREN, + t_LBRACE, + t_RBRACE, + t_COLON, + t_COMMA, + t_DOT, + t_SEMICOLON, + t_AT, + t_PLUS, + t_MINUS, + t_MULTIPLY, + t_DIVIDE, + t_EQ, + t_LT, + t_LTEQ, + t_ASSIGN, + t_INT_COMP, + t_NOT +] + #Complex rules for cool @@ -182,25 +183,12 @@ def t_STRING_anything(self, token): token.lexer.string_backslashed = True +# STRING ignored characters +t_STRING_ignore = '' + -complex_rules = [ - t_BOOLEAN, - t_INTEGER, - t_TYPE, - t_newline, - t_ID, - #---------- - #String states rules - t_STRING_start, - t_STRING_newline, - t_STRING_anything, - t_STRING_end - #---------- - ] -# STRING ignored characters -t_STRING_ignore = '' # The comment states @@ -245,3 +233,34 @@ def t_error(self, token): print("Illegal character! Line: {0}, character: {1}".format(token.lineno, token.value[0])) token.lexer.skip(1) + + +#Complex rules list +complex_rules = [ + t_BOOLEAN, + t_INTEGER, + t_TYPE, + t_newline, + t_ID, + #---------- + #String states rules + t_STRING_start, + t_STRING_newline, + t_STRING_anything, + t_STRING_end, + t_STRING_ignore, + #---------- + #Comment states rules + t_COMMENT_start, + t_COMMENT_startanother, + t_COMMENT_end, + t_COMMENT_ignore + ] + +#Error handlers +error_handlers = [ + t_STRING_error, + t_COMMENT_error, + t_error +] + diff --git a/src/main.py b/src/main.py index 3c3f8db3..7d21f021 100644 --- a/src/main.py +++ b/src/main.py @@ -1,14 +1,12 @@ import sys, fileinput from argparse import ArgumentParser - +from compiler.utils.compiler_containers import container_dict parser_input = ArgumentParser(description= 'This is the Diaz-Horrach cool compiler, an school project.\nRead this help and see the ofitial repo') -parser_input.add_argument('files_for_compile', help = 'The file(s) to be compiled', nargs= '*') -parser_input.add_argument("--lex", '-l', help = 'Output the lexer for the .cl file', action = 'store_true') -parser_input.add_argument("--ast", help = 'Output the abstract syntax tree (AST) for the .cl file', action = 'store_true') -parser_input.add_argument("--outputFile", '-oF', help = 'Put the info of the output options in the specified file.\n If no output option is specified the file creates empty.') +parser_input.add_argument('files_for_compile', help = 'The file(s) to be compiled', nargs= '+') +parser_input.add_argument("--lexer", help = 'Select the lexer that you could use from avialable options', choices= container_dict['lexer_options'].keys()) +parser_input.add_argument("--parser", help = 'Select the lexer that you could use from avialable options', choices= container_dict['parser_options'].keys()) +parser_input.add_argument("--output", help = 'Put the info of the selected components in the standard output.', choices= ['l','p','t']) args = parser_input.parse_args() working_input = fileinput.input(files = args.files_for_compile) -if args.outputFile: - fd = open(mode= 'x', file = './output_files' + args.outputFile) From a5b8569903bfc879c31bf2770746aec7b81e9c8e Mon Sep 17 00:00:00 2001 From: DiazRock Date: Sat, 29 Feb 2020 20:46:39 -0500 Subject: [PATCH 09/23] The moment for test is near. I can feel it. --- src/compiler/components/lexer_analyzer.py | 45 +++++++++++++++++++++-- src/main.py | 8 ++-- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py index 06b9efc8..c95c372a 100644 --- a/src/compiler/components/lexer_analyzer.py +++ b/src/compiler/components/lexer_analyzer.py @@ -8,22 +8,25 @@ def __init__(self, basic_keywords, simple_rules, complex_rules, + error_handlers, *args, **kwargs): - self.inject_complex_rules(complex_rules) + self.inject(complex_rules) + self.inject(error_handlers) self.tokens_collection = tokens_collection self.basic_keywords = basic_keywords super().__init__(*args, **kwargs) - def inject_complex_rules(self, complex_rules): - for x in complex_rules: - super().__setattr__(x.__name__, x) + def inject(self, function_group): + for function in function_group: + super().__setattr__(function.__name__, function) def build_component(self): self.reserved = self.basic_keywords.keys() self.tokens = self.tokens_collection + tuple(self.basic_keywords.values()) self.lexer = lex.lex(module = self) + def input_lexer(self, cool_program_source_code): self.lexer.input(cool_program_source_code) @@ -32,3 +35,37 @@ def input_lexer(self, cool_program_source_code): def token(self): self.last_token = self.lexer.token() return self.last_token + + + # A funny iterator here + def __iter__(self): + return self + + def __next__(self): + t = self.token() + if t is None: + raise StopIteration + return t + + def next(self): + return self.__next__() + # End of fun + + + +if __name__ == "__main__": + import sys + + input_info = sys.argv[1] + with open(input_info, encoding = 'utf-8') as file: + cool_program_source_code = file.read() + + lexer = lexer_analyzer(tokens_collection = injector.tokens_collection_cool, + basic_keywords = injector.reserved_keywords_cool, + simple_rules = injector.simple_rules_cool, + complex_rules = injector.complex_rules_cool, + error_handlers = injector.error_handlers_cool) + lexer.input_lexer(cool_program_source_code) + + for token in lexer: + print(token) \ No newline at end of file diff --git a/src/main.py b/src/main.py index 7d21f021..1afa790c 100644 --- a/src/main.py +++ b/src/main.py @@ -4,9 +4,11 @@ parser_input = ArgumentParser(description= 'This is the Diaz-Horrach cool compiler, an school project.\nRead this help and see the ofitial repo') parser_input.add_argument('files_for_compile', help = 'The file(s) to be compiled', nargs= '+') -parser_input.add_argument("--lexer", help = 'Select the lexer that you could use from avialable options', choices= container_dict['lexer_options'].keys()) -parser_input.add_argument("--parser", help = 'Select the lexer that you could use from avialable options', choices= container_dict['parser_options'].keys()) -parser_input.add_argument("--output", help = 'Put the info of the selected components in the standard output.', choices= ['l','p','t']) +parser_input.add_argument("--lexer", help = 'Select the lexer that you could use from avialable options', choices = container_dict['lexer_options'].keys()) +parser_input.add_argument("--parser", help = 'Select the lexer that you could use from avialable options', choices = container_dict['parser_options'].keys()) +parser_input.add_argument("--output", help = 'Put the info of the selected components in the standard output.', choices = ['l','p','t']) + args = parser_input.parse_args() working_input = fileinput.input(files = args.files_for_compile) + From facd09d7fb6fd35d94695848dd55f22aa260b2a2 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Wed, 20 May 2020 16:25:53 -0400 Subject: [PATCH 10/23] 10 of 13 test passed --- src/compiler/components/lexer_analyzer.py | 257 +++++++++++++++++----- src/coolc.sh | 6 +- src/main.py | 22 +- 3 files changed, 218 insertions(+), 67 deletions(-) diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py index c95c372a..d8d1b974 100644 --- a/src/compiler/components/lexer_analyzer.py +++ b/src/compiler/components/lexer_analyzer.py @@ -1,71 +1,210 @@ -from .abstract_component import Component -from ..utils.compiler_containers import lexer_analyzer_dependency_container as injector -from ply import lex - -class lexer_analyzer(Component): - def __init__(self, - tokens_collection, - basic_keywords, - simple_rules, - complex_rules, - error_handlers, - *args, - **kwargs): - self.inject(complex_rules) - self.inject(error_handlers) - self.tokens_collection = tokens_collection - self.basic_keywords = basic_keywords - super().__init__(*args, **kwargs) - - def inject(self, function_group): - for function in function_group: - super().__setattr__(function.__name__, function) +import ply.lex as lex +from ply.lex import Token +from ply.lex import TOKEN +from ..utils.errors import lexicographicError - - def build_component(self): - self.reserved = self.basic_keywords.keys() - self.tokens = self.tokens_collection + tuple(self.basic_keywords.values()) - self.lexer = lex.lex(module = self) +tokens = [ + # Identifiers + "ID", "TYPE", + + # Primitive Types + "INTEGER", "STRING", "BOOLEAN", + + # Literals + "LPAREN", "RPAREN", "LBRACE", "RBRACE", "COLON", "COMMA", "DOT", "SEMICOLON", "AT", + + # Operators + "PLUS", "MINUS", "MULTIPLY", "DIVIDE", "EQ", "LT", "LTEQ", "ASSIGN", "INT_COMP", "NOT", + + # Special Operators + "ARROW" +] + +reserved = { + 'new':'NEW', + 'of':'OF', + 'if' : 'IF', + 'let':'LET', + 'in' : 'IN', + 'fi':'FI', + 'else' : 'ELSE', + 'while':'WHILE', + 'case':'CASE', + 'then' : 'THEN', + 'esac':'ESAC', + 'pool':'POOL', + 'class':'CLASS', + 'loop':'LOOP', + 'true':'TRUE', + 'inherits':'INHERITS', + 'isvoid':'ISVOID', + 'false':'FALSE' +} + +tokens += list(reserved.values()) + +#Simple rules +t_PLUS = r'\+' +t_MINUS = r'\-' +t_MULTIPLY = r'\*' +t_DIVIDE = r'\/' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_EQ = r'\=' +t_LT = r'\<' +t_LTEQ = r'\<\=' +t_ASSIGN = r'\<\-' +t_INT_COMP = r'~' +t_NOT = r'not' +t_LBRACE = r'\{' +t_RBRACE = r'\}' +t_COLON = r'\:' +t_COMMA = r'\,' +t_DOT = r'\.' +t_SEMICOLON = r'\;' +t_AT = r'\@' +t_ARROW = r'\=\>' +#complex rules + +@TOKEN(r"(true|false)") +def t_BOOLEAN(token): + token.value = True if token.value == "true" else False + return token + +@TOKEN(r"\d+") +def t_INTEGER(token): + token.value = int(token.value) + return token + +@TOKEN(r"[A-Z][A-Za-z0-9_]*") +def t_TYPE(token): + token.type = reserved.get(token.value, 'TYPE') + return token + +@TOKEN(r"[a-z][A-Za-z0-9_]*") +def t_ID(token): + token.type = reserved.get(token.value, "ID") + return token + +# Utility definitions +@TOKEN(r'\n+') +def t_ANY_newline(t): + global readjust_col + readjust_col = t.lexpos + len(t.value) + t.lexer.lineno += len(t.value) - - def input_lexer(self, cool_program_source_code): - self.lexer.input(cool_program_source_code) - +def t_error(token): + errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='ERROR "%s"' % (token.value[:1]))) + token.lexer.skip(1) - def token(self): - self.last_token = self.lexer.token() - return self.last_token +t_ignore = ' \t' +t_ignore_COMMENTLINE = r"\-\-[^\n]*" - # A funny iterator here - def __iter__(self): - return self +#Global states +states = ( + ("STRING", "exclusive"), + ("COMMENT", "exclusive") +) - def __next__(self): - t = self.token() - if t is None: - raise StopIteration - return t +#The string states +@TOKEN(r'\"') +def t_start_string(token): + token.lexer.push_state("STRING") + token.lexer.string_backslashed = False + token.lexer.stringbuf = "" - def next(self): - return self.__next__() - # End of fun +@TOKEN(r"\n") +def t_STRING_newline(token): + token.lexer.lineno += 1 + if not token.lexer.string_backslashed: + errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message= "Unterminated string constant")) + token.lexer.pop_state() + else: + token.lexer.string_backslashed = False +@TOKEN("\0") +def t_STRING_null(token): + errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='Null character in string')) + token.lexer.skip(1) +@TOKEN(r"\"") +def t_STRING_end(token): + if not token.lexer.string_backslashed: + token.lexer.pop_state() + token.value = token.lexer.stringbuf + token.type = "STRING" + return token + else: + token.lexer.stringbuf += '"' + token.lexer.string_backslashed = False -if __name__ == "__main__": - import sys +@TOKEN(r"[^\n]") +def t_STRING_anything(token): + if token.lexer.string_backslashed: + if token.value == 'b': + token.lexer.stringbuf += '\b' + elif token.value == 't': + token.lexer.stringbuf += '\t' + elif token.value == 'n': + token.lexer.stringbuf += '\n' + elif token.value == 'f': + token.lexer.stringbuf += '\f' + elif token.value == '\\': + token.lexer.stringbuf += '\\' + else: + token.lexer.stringbuf += token.value + token.lexer.string_backslashed = False + else: + if token.value != '\\': + token.lexer.stringbuf += token.value + else: + token.lexer.string_backslashed = True - input_info = sys.argv[1] - with open(input_info, encoding = 'utf-8') as file: - cool_program_source_code = file.read() +def t_STRING_error(token): + token.lexer.skip(1) + errors.append(lexicographicError( + row_and_col= (token.lineno, token.lexpos - readjust_col + 1), + message= 'ERROR at or near ')) + +t_STRING_ignore = '' + +# The comment state + +@TOKEN(r"\(\*") +def t_start_comment(token): + token.lexer.push_state("COMMENT") + token.lexer.comment_count = 0 + +@TOKEN(r"\(\*") +def t_COMMENT_startanother(token): + token.lexer.comment_count += 1 + +@TOKEN(r"\*\)") +def t_COMMENT_end(token): + if token.lexer.comment_count == 0: + token.lexer.pop_state() + else: + token.lexer.comment_count -= 1 + + +def t_COMMENT_error(token): + token.lexer.skip(1) - lexer = lexer_analyzer(tokens_collection = injector.tokens_collection_cool, - basic_keywords = injector.reserved_keywords_cool, - simple_rules = injector.simple_rules_cool, - complex_rules = injector.complex_rules_cool, - error_handlers = injector.error_handlers_cool) - lexer.input_lexer(cool_program_source_code) - - for token in lexer: - print(token) \ No newline at end of file +def t_COMMENT_eof(token): + errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message= "EOF in comment")) + token.lexer.pop_state() + +t_COMMENT_ignore = '' +errors = [] + + +def tokenizer(stream_input): + lexer = lex.lex() + lexer.input(stream_input) + token_list = [] + for tok in lexer: + token_list.append(tok) + + return errors, token_list + diff --git a/src/coolc.sh b/src/coolc.sh index f3613a9e..b6e7d50d 100755 --- a/src/coolc.sh +++ b/src/coolc.sh @@ -1,6 +1,6 @@ # Incluya aquí las instrucciones necesarias para ejecutar su compilador - + INPUT_FILE=$1 OUTPUT_FILE=${INPUT_FILE:0: -2}mips @@ -15,6 +15,6 @@ echo "LINEA_CON_NOMBRE_Y_VERSION_DEL_COMPILADOR" # TODO: Recuerde cambiar echo "CopyLeft (L) 2020: Alejandro Díaz Roque, Rafael Horrach" # Llamar al compilador -python3 main.py $@ -echo "Compiling $INPUT_FILE into $OUTPUT_FILE" +python main.py "$@" +#echo "Compiling $INPUT_FILE into $OUTPUT_FILE" diff --git a/src/main.py b/src/main.py index 1afa790c..82d313d1 100644 --- a/src/main.py +++ b/src/main.py @@ -1,14 +1,26 @@ import sys, fileinput from argparse import ArgumentParser -from compiler.utils.compiler_containers import container_dict +from compiler.components.lexer_analyzer import tokenizer parser_input = ArgumentParser(description= 'This is the Diaz-Horrach cool compiler, an school project.\nRead this help and see the ofitial repo') parser_input.add_argument('files_for_compile', help = 'The file(s) to be compiled', nargs= '+') -parser_input.add_argument("--lexer", help = 'Select the lexer that you could use from avialable options', choices = container_dict['lexer_options'].keys()) -parser_input.add_argument("--parser", help = 'Select the lexer that you could use from avialable options', choices = container_dict['parser_options'].keys()) +""" parser_input.add_argument("--lexer", help = 'Select the lexer that you could use from avialable options', choices = component_injector['lexer_options'].keys(), + default='cool') +parser_input.add_argument("--parser", help = 'Select the lexer that you could use from avialable options', choices = component_injector['parser_options'].keys()) parser_input.add_argument("--output", help = 'Put the info of the selected components in the standard output.', choices = ['l','p','t']) - + """ args = parser_input.parse_args() -working_input = fileinput.input(files = args.files_for_compile) +#print(args.files_for_compile) +file = open(args.files_for_compile[0]) +working_input = file.read() + +errors, tokens = tokenizer(working_input) + +if errors: + for error in errors: + print(error) + exit(1) +print(tokens) +exit(0) From ab5d06ec5095a999e20d9e39ed19bfabbbf4d862 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Wed, 20 May 2020 21:48:23 -0400 Subject: [PATCH 11/23] 100% of test case passed --- src/compiler/components/lexer_analyzer.py | 25 +++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py index d8d1b974..2febfa91 100644 --- a/src/compiler/components/lexer_analyzer.py +++ b/src/compiler/components/lexer_analyzer.py @@ -88,8 +88,8 @@ def t_ID(token): # Utility definitions @TOKEN(r'\n+') -def t_ANY_newline(t): - global readjust_col +def t_newline(t): + global readjust_col readjust_col = t.lexpos + len(t.value) t.lexer.lineno += len(t.value) @@ -114,21 +114,24 @@ def t_start_string(token): token.lexer.string_backslashed = False token.lexer.stringbuf = "" -@TOKEN(r"\n") +@TOKEN(r'\n') def t_STRING_newline(token): + global readjust_col token.lexer.lineno += 1 if not token.lexer.string_backslashed: - errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message= "Unterminated string constant")) + errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), + message= "Unterminated string constant")) token.lexer.pop_state() else: token.lexer.string_backslashed = False + readjust_col = token.lexpos + len(token.value) -@TOKEN("\0") +@TOKEN('\0') def t_STRING_null(token): errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='Null character in string')) token.lexer.skip(1) -@TOKEN(r"\"") +@TOKEN(r'\"') def t_STRING_end(token): if not token.lexer.string_backslashed: token.lexer.pop_state() @@ -167,6 +170,10 @@ def t_STRING_error(token): row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message= 'ERROR at or near ')) +def t_STRING_eof(token): + errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='EOF in string constant')) + token.lexer.pop_state() + t_STRING_ignore = '' # The comment state @@ -180,6 +187,12 @@ def t_start_comment(token): def t_COMMENT_startanother(token): token.lexer.comment_count += 1 +@TOKEN(r"\n+") +def t_COMMENT_newline(token): + global readjust_col + readjust_col = token.lexpos + len(token.value) + token.lexer.lineno += len(token.value) + @TOKEN(r"\*\)") def t_COMMENT_end(token): if token.lexer.comment_count == 0: From 5fdaea5b43fc9a30977a54df71db5557d8f1b552 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Wed, 27 May 2020 16:03:41 -0400 Subject: [PATCH 12/23] A bug in the error column tracker --- src/compiler/components/syntax_analyzer.py | 377 +++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 src/compiler/components/syntax_analyzer.py diff --git a/src/compiler/components/syntax_analyzer.py b/src/compiler/components/syntax_analyzer.py new file mode 100644 index 00000000..bfec609a --- /dev/null +++ b/src/compiler/components/syntax_analyzer.py @@ -0,0 +1,377 @@ +import ply.yacc as yacc +from ply.yacc import YaccProduction, YaccSymbol +from ..utils.errors import error +from ..components.lexer_analyzer import lexer + +class pyCoolParser: + def __init__(self, tokens): + self.tokens = tokens + self.errors_parser = [] + self.parser = yacc.yacc(module=self) + self.row_tracker = 0 + self.column_corrector = 0 + + # precedence rules + precedence = ( + ('right', 'ASSIGN'), + ('right', 'NOT'), + ('nonassoc', 'LTEQ', 'LT', 'EQ'), + ('left', 'PLUS', 'MINUS'), + ('left', 'MULTIPLY', 'DIVIDE'), + ('right', 'ISVOID'), + ('right', 'INT_COMP'), + ('left', 'AT'), + ('left', 'DOT') + ) + + + def positionTrack(self, + p : YaccProduction, + functionName, + indexLinespan = 0, + indexLexspan = 0): + """ print('%%%%%%%%%%%%%%%%%') + if type(p.stack[-1]) != YaccSymbol: + if self.row_tracker != p.stack[-1].lineno: + print('--------------') + print('Inside the if') + print('before lineno %d' %(self.row_tracker)) + self.column_corrector = p.stack[-1].lexpos + self.row_tracker = p.stack[-1].lineno + print (p.stack[-1]) + print('new lineno %d' %self.row_tracker) + print('and col corrector %d' %self.column_corrector) + print('--------------') + else: + print('--------------') + print('Outside before lineno %s' %self.row_tracker) + print('Outside current lineno %s' %p.lineno(indexLinespan)) + print('--------------') + print('the real col %d' %(p.lexpos(indexLinespan)- self.column_corrector)) + print('the fucked col %d' %(p.lexpos(indexLinespan))) + print(p.stack) + print('In the function %s' %functionName) + print('%%%%%%%%%%%%%%%%%')""" + pass + def p_program(self, p): + """ + program : class_list + """ + self.positionTrack(p, functionName='p_program') + + + def p_class_list(self, p): + """ + class_list : class_list class SEMICOLON + | class SEMICOLON + """ + self.positionTrack(p, functionName='p_class_list') + + def p_class(self, p): + """ + class : CLASS TYPE LBRACE features_list_opt RBRACE + """ + self.positionTrack(p, functionName='p_class') + + def p_class_inherits(self, p): + """ + class : CLASS TYPE INHERITS TYPE LBRACE features_list_opt RBRACE + """ + self.positionTrack(p, functionName='p_class_inherits') + + def p_feature_list_opt(self, p): + """ + features_list_opt : features_list + | empty + """ + self.positionTrack(p, functionName='p_feature_list_opt') + + def p_feature_list(self, p): + """ + features_list : features_list feature SEMICOLON + | feature SEMICOLON + """ + self.positionTrack(p, functionName='p_feature_list') + + def p_feature_method(self, p): + """ + feature : ID LPAREN formal_params_list RPAREN COLON TYPE LBRACE expression RBRACE + """ + self.positionTrack(p, functionName='p_feature_method') + + def p_feature_method_no_formals(self, p): + """ + feature : ID LPAREN RPAREN COLON TYPE LBRACE expression RBRACE + """ + self.positionTrack(p, functionName='p_feature_method_no_formals') + + def p_feature_attr_initialized(self, p): + """ + feature : ID COLON TYPE ASSIGN expression + """ + self.positionTrack(p, functionName='p_feature_attr_initialized') + + def p_feature_attr(self, p): + """ + feature : ID COLON TYPE + """ + self.positionTrack(p, functionName='p_feature_attr') + + def p_formal_list_many(self, p): + """ + formal_params_list : formal_params_list COMMA formal_param + | formal_param + """ + self.positionTrack(p, functionName='p_formal_list_many') + + def p_formal(self, p): + """ + formal_param : ID COLON TYPE + """ + self.positionTrack(p, functionName='p_formal') + + def p_expression_object_identifier(self, p): + """ + expression : ID + """ + self.positionTrack(p, functionName='p_expression_object_identifier') + + def p_expression_integer_constant(self, p): + """ + expression : INTEGER + """ + self.positionTrack(p, functionName='p_expression_integer_constant') + + def p_expression_boolean_constant(self, p): + """ + expression : BOOLEAN + """ + self.positionTrack(p, functionName='p_expression_boolean_constant') + + def p_expression_string_constant(self, p): + """ + expression : STRING + """ + self.positionTrack(p, functionName='p_expression_string_constant') + + def p_expr_self(self, p): + """ + expression : SELF + """ + self.positionTrack(p, functionName='p_expr_self') + + def p_expression_block(self, p): + """ + expression : LBRACE block_list RBRACE + """ + self.positionTrack(p, functionName='p_expression_block') + + def p_block_list(self, p): + """ + block_list : block_list expression SEMICOLON + | expression SEMICOLON + """ + self.positionTrack(p, functionName='p_block_list') + + def p_expression_assignment(self, p): + """ + expression : ID ASSIGN expression + """ + self.positionTrack(p, functionName='p_expression_assignment') + +# ######################### UNARY OPERATIONS ####################################### + + def p_expression_new(self, p): + """ + expression : NEW TYPE + """ + self.positionTrack(p, functionName='p_expression_new') + + def p_expression_isvoid(self, p): + """ + expression : ISVOID expression + """ + self.positionTrack(p, functionName='p_expression_isvoid') + + def p_expression_integer_complement(self, p): + """ + expression : INT_COMP expression + """ + self.positionTrack(p, functionName='p_expression_integer_complement') + + def p_expression_boolean_complement(self, p): + """ + expression : NOT expression + """ + self.positionTrack(p, functionName='p_expression_boolean_complement') + + # ######################### PARENTHESIZED, MATH & COMPARISONS ##################### + + def p_expression_math_operations(self, p): + """ + expression : expression PLUS expression + | expression MINUS expression + | expression MULTIPLY expression + | expression DIVIDE expression + """ + self.positionTrack(p, functionName='p_expression_math_operations') + + def p_expression_math_comparisons(self, p): + """ + expression : expression LT expression + | expression LTEQ expression + | expression EQ expression + """ + self.positionTrack(p, functionName='p_expression_math_comparisons') + + def p_expression_with_parenthesis(self, p): + """ + expression : LPAREN expression RPAREN + """ + self.positionTrack(p, functionName='p_expression_with_parenthesis') + + # ######################### CONTROL FLOW EXPRESSIONS ############################## + + def p_expression_if_conditional(self, p): + """ + expression : IF expression THEN expression ELSE expression FI + """ + self.positionTrack(p, functionName='p_expression_if_conditional') + + def p_expression_while_loop(self, p): + """ + expression : WHILE expression LOOP expression POOL + """ + self.positionTrack(p, functionName='p_expression_while_loop') + + ## ######################### LET EXPRESSIONS ######################################## + + def p_expression_let(self, p): + """ + expression : let_expression + """ + self.positionTrack(p, functionName='p_expression_let') + + def p_expression_let_simple(self, p): + """ + let_expression : LET ID COLON TYPE IN expression + | nested_lets COMMA LET ID COLON TYPE + """ + self.positionTrack(p, functionName='p_expression_let_simple') + + def p_expression_let_initialized(self, p): + """ + let_expression : LET ID COLON TYPE ASSIGN expression IN expression + | nested_lets COMMA LET ID COLON TYPE ASSIGN expression + """ + self.positionTrack(p, functionName='p_expression_let_initialized') + + def p_inner_lets_simple(self, p): + """ + nested_lets : ID COLON TYPE IN expression + | nested_lets COMMA ID COLON TYPE + """ + self.positionTrack(p, functionName='p_inner_lets_simple') + + def p_inner_lets_initialized(self, p): + """ + nested_lets : ID COLON TYPE ASSIGN expression IN expression + | nested_lets COMMA ID COLON TYPE ASSIGN expression + """ + self.positionTrack(p, functionName='p_inner_lets_initialized') + + # ######################### CASE EXPRESSION ######################################## + + def p_expression_case(self, p): + """ + expression : CASE expression OF actions_list ESAC + """ + self.positionTrack(p, functionName='p_expression_case') + + def p_actions_list(self, p): + """ + actions_list : actions_list action + | action + """ + self.positionTrack(p, functionName='p_actions_list') + + def p_action_expr(self, p): + """ + action : ID COLON TYPE ARROW expression SEMICOLON + """ + self.positionTrack(p, functionName='p_action_expr') + + + # ######################### METHODS DISPATCH ###################################### + + def p_expression_dispatch(self, p): + """ + expression : expression DOT ID LPAREN arguments_list_opt RPAREN + """ + self.positionTrack(p, functionName='p_expression_dispatch') + + def p_arguments_list_opt(self, p): + """ + arguments_list_opt : arguments_list + | empty + """ + self.positionTrack(p, functionName='p_arguments_list_opt') + + + def p_arguments_list(self, p): + """ + arguments_list : arguments_list COMMA expression + | expression + """ + self.positionTrack(p, functionName='p_arguments_list') + + def p_expression_static_dispatch(self, p): + """ + expression : expression AT TYPE DOT ID LPAREN arguments_list_opt RPAREN + """ + self.positionTrack(p, functionName='p_expression_static_dispatch') + + + def p_expression_self_dispatch(self, p): + """ + expression : ID LPAREN arguments_list_opt RPAREN + """ + self.positionTrack(p, functionName='p_expression_self_dispatch') + + + + # ######################### ################## ################################### + + def p_empty(self, p): + """ + empty : + """ + self.positionTrack(p, functionName='p_empty') + + def findColumn(self, trackedRow): + for i in range(len(self.parser.symstack) -1, 1, -1): + if self.parser.symstack[i].lineno != trackedRow: + return self.parser.symstack[i].lexpos + return 0 + + def p_error(self, p): + """ + Error rule for Syntax Errors handling and reporting. + """ + error_message = "EOF in string" if p is None else "Error at or near %s" %p.value + column_corrector = 0 if p is None else self.findColumn(p.lineno) + self.errors_parser.append( + error(message= error_message, + error_type="SyntacticError", + row_and_col= (0,0) if p is None else (p.lineno, p.lexpos - column_corrector - 1))) + print("Aquí ando") + self.parser.errok() + self.parser.token() + + +def run_parser(tokens, source_program): + parserCool = pyCoolParser(tokens) + lexer.lineno = 1 + parserCool.parser.parse(source_program, tracking=True, lexer=lexer) + return parserCool.errors_parser + From 4fb1f7d00e8d813f602e159e78617b3373366b94 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Fri, 5 Jun 2020 13:06:36 -0400 Subject: [PATCH 13/23] Passed more tests --- src/compiler/components/lexer_analyzer.py | 27 +-- src/compiler/components/syntax_analyzer.py | 192 +++++++-------------- 2 files changed, 80 insertions(+), 139 deletions(-) diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py index 2febfa91..5fce84c6 100644 --- a/src/compiler/components/lexer_analyzer.py +++ b/src/compiler/components/lexer_analyzer.py @@ -1,7 +1,7 @@ import ply.lex as lex from ply.lex import Token from ply.lex import TOKEN -from ..utils.errors import lexicographicError +from ..utils.errors import error tokens = [ # Identifiers @@ -38,7 +38,8 @@ 'true':'TRUE', 'inherits':'INHERITS', 'isvoid':'ISVOID', - 'false':'FALSE' + 'false':'FALSE', + "self": "SELF", } tokens += list(reserved.values()) @@ -94,7 +95,8 @@ def t_newline(t): t.lexer.lineno += len(t.value) def t_error(token): - errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='ERROR "%s"' % (token.value[:1]))) + global readjust_col + errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='ERROR "%s"' % (token.value[:1]))) token.lexer.skip(1) t_ignore = ' \t' @@ -119,7 +121,7 @@ def t_STRING_newline(token): global readjust_col token.lexer.lineno += 1 if not token.lexer.string_backslashed: - errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), + errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message= "Unterminated string constant")) token.lexer.pop_state() else: @@ -128,7 +130,7 @@ def t_STRING_newline(token): @TOKEN('\0') def t_STRING_null(token): - errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='Null character in string')) + errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='Null character in string')) token.lexer.skip(1) @TOKEN(r'\"') @@ -166,12 +168,12 @@ def t_STRING_anything(token): def t_STRING_error(token): token.lexer.skip(1) - errors.append(lexicographicError( + errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message= 'ERROR at or near ')) def t_STRING_eof(token): - errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='EOF in string constant')) + errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='EOF in string constant')) token.lexer.pop_state() t_STRING_ignore = '' @@ -205,19 +207,24 @@ def t_COMMENT_error(token): token.lexer.skip(1) def t_COMMENT_eof(token): - errors.append(lexicographicError(row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message= "EOF in comment")) + global readjust_col + errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message= "EOF in comment")) token.lexer.pop_state() t_COMMENT_ignore = '' errors = [] +lexer = lex.lex() def tokenizer(stream_input): - lexer = lex.lex() + global readjust_col + readjust_col = 0 lexer.input(stream_input) token_list = [] + real_col = {} for tok in lexer: + real_col.update({ str(tok): tok.lexpos - readjust_col + 1 }) token_list.append(tok) - return errors, token_list + return errors, token_list, real_col diff --git a/src/compiler/components/syntax_analyzer.py b/src/compiler/components/syntax_analyzer.py index bfec609a..dfd04f4c 100644 --- a/src/compiler/components/syntax_analyzer.py +++ b/src/compiler/components/syntax_analyzer.py @@ -4,12 +4,13 @@ from ..components.lexer_analyzer import lexer class pyCoolParser: - def __init__(self, tokens): + def __init__(self, tokens, real_col): self.tokens = tokens self.errors_parser = [] self.parser = yacc.yacc(module=self) self.row_tracker = 0 self.column_corrector = 0 + self.real_col = real_col # precedence rules precedence = ( @@ -23,187 +24,134 @@ def __init__(self, tokens): ('left', 'AT'), ('left', 'DOT') ) - - - def positionTrack(self, - p : YaccProduction, - functionName, - indexLinespan = 0, - indexLexspan = 0): - """ print('%%%%%%%%%%%%%%%%%') - if type(p.stack[-1]) != YaccSymbol: - if self.row_tracker != p.stack[-1].lineno: - print('--------------') - print('Inside the if') - print('before lineno %d' %(self.row_tracker)) - self.column_corrector = p.stack[-1].lexpos - self.row_tracker = p.stack[-1].lineno - print (p.stack[-1]) - print('new lineno %d' %self.row_tracker) - print('and col corrector %d' %self.column_corrector) - print('--------------') - else: - print('--------------') - print('Outside before lineno %s' %self.row_tracker) - print('Outside current lineno %s' %p.lineno(indexLinespan)) - print('--------------') - print('the real col %d' %(p.lexpos(indexLinespan)- self.column_corrector)) - print('the fucked col %d' %(p.lexpos(indexLinespan))) - print(p.stack) - print('In the function %s' %functionName) - print('%%%%%%%%%%%%%%%%%')""" - pass + def p_program(self, p): """ program : class_list - """ - self.positionTrack(p, functionName='p_program') + """ def p_class_list(self, p): """ class_list : class_list class SEMICOLON | class SEMICOLON - """ - self.positionTrack(p, functionName='p_class_list') + """ def p_class(self, p): """ class : CLASS TYPE LBRACE features_list_opt RBRACE - """ - self.positionTrack(p, functionName='p_class') + """ def p_class_inherits(self, p): """ class : CLASS TYPE INHERITS TYPE LBRACE features_list_opt RBRACE - """ - self.positionTrack(p, functionName='p_class_inherits') + """ def p_feature_list_opt(self, p): """ features_list_opt : features_list | empty - """ - self.positionTrack(p, functionName='p_feature_list_opt') + """ def p_feature_list(self, p): """ features_list : features_list feature SEMICOLON | feature SEMICOLON - """ - self.positionTrack(p, functionName='p_feature_list') + """ def p_feature_method(self, p): """ feature : ID LPAREN formal_params_list RPAREN COLON TYPE LBRACE expression RBRACE - """ - self.positionTrack(p, functionName='p_feature_method') + """ def p_feature_method_no_formals(self, p): """ feature : ID LPAREN RPAREN COLON TYPE LBRACE expression RBRACE - """ - self.positionTrack(p, functionName='p_feature_method_no_formals') + """ def p_feature_attr_initialized(self, p): """ feature : ID COLON TYPE ASSIGN expression - """ - self.positionTrack(p, functionName='p_feature_attr_initialized') + """ def p_feature_attr(self, p): """ feature : ID COLON TYPE - """ - self.positionTrack(p, functionName='p_feature_attr') + """ def p_formal_list_many(self, p): """ formal_params_list : formal_params_list COMMA formal_param | formal_param - """ - self.positionTrack(p, functionName='p_formal_list_many') + """ def p_formal(self, p): """ formal_param : ID COLON TYPE - """ - self.positionTrack(p, functionName='p_formal') + """ def p_expression_object_identifier(self, p): """ expression : ID - """ - self.positionTrack(p, functionName='p_expression_object_identifier') + """ def p_expression_integer_constant(self, p): """ expression : INTEGER - """ - self.positionTrack(p, functionName='p_expression_integer_constant') + """ def p_expression_boolean_constant(self, p): """ expression : BOOLEAN - """ - self.positionTrack(p, functionName='p_expression_boolean_constant') + """ def p_expression_string_constant(self, p): """ expression : STRING - """ - self.positionTrack(p, functionName='p_expression_string_constant') + """ def p_expr_self(self, p): """ expression : SELF - """ - self.positionTrack(p, functionName='p_expr_self') + """ def p_expression_block(self, p): """ expression : LBRACE block_list RBRACE - """ - self.positionTrack(p, functionName='p_expression_block') + """ def p_block_list(self, p): """ block_list : block_list expression SEMICOLON | expression SEMICOLON - """ - self.positionTrack(p, functionName='p_block_list') + """ def p_expression_assignment(self, p): """ expression : ID ASSIGN expression - """ - self.positionTrack(p, functionName='p_expression_assignment') + """ # ######################### UNARY OPERATIONS ####################################### def p_expression_new(self, p): """ expression : NEW TYPE - """ - self.positionTrack(p, functionName='p_expression_new') + """ def p_expression_isvoid(self, p): """ expression : ISVOID expression - """ - self.positionTrack(p, functionName='p_expression_isvoid') + """ def p_expression_integer_complement(self, p): """ expression : INT_COMP expression - """ - self.positionTrack(p, functionName='p_expression_integer_complement') + """ def p_expression_boolean_complement(self, p): """ expression : NOT expression - """ - self.positionTrack(p, functionName='p_expression_boolean_complement') + """ # ######################### PARENTHESIZED, MATH & COMPARISONS ##################### @@ -213,93 +161,80 @@ def p_expression_math_operations(self, p): | expression MINUS expression | expression MULTIPLY expression | expression DIVIDE expression - """ - self.positionTrack(p, functionName='p_expression_math_operations') + """ def p_expression_math_comparisons(self, p): """ expression : expression LT expression | expression LTEQ expression | expression EQ expression - """ - self.positionTrack(p, functionName='p_expression_math_comparisons') + """ def p_expression_with_parenthesis(self, p): """ expression : LPAREN expression RPAREN - """ - self.positionTrack(p, functionName='p_expression_with_parenthesis') + """ # ######################### CONTROL FLOW EXPRESSIONS ############################## def p_expression_if_conditional(self, p): """ expression : IF expression THEN expression ELSE expression FI - """ - self.positionTrack(p, functionName='p_expression_if_conditional') + """ def p_expression_while_loop(self, p): """ expression : WHILE expression LOOP expression POOL - """ - self.positionTrack(p, functionName='p_expression_while_loop') + """ ## ######################### LET EXPRESSIONS ######################################## def p_expression_let(self, p): """ expression : let_expression - """ - self.positionTrack(p, functionName='p_expression_let') + """ def p_expression_let_simple(self, p): """ let_expression : LET ID COLON TYPE IN expression | nested_lets COMMA LET ID COLON TYPE - """ - self.positionTrack(p, functionName='p_expression_let_simple') + """ def p_expression_let_initialized(self, p): """ let_expression : LET ID COLON TYPE ASSIGN expression IN expression | nested_lets COMMA LET ID COLON TYPE ASSIGN expression - """ - self.positionTrack(p, functionName='p_expression_let_initialized') + """ def p_inner_lets_simple(self, p): """ nested_lets : ID COLON TYPE IN expression | nested_lets COMMA ID COLON TYPE - """ - self.positionTrack(p, functionName='p_inner_lets_simple') + """ def p_inner_lets_initialized(self, p): """ nested_lets : ID COLON TYPE ASSIGN expression IN expression | nested_lets COMMA ID COLON TYPE ASSIGN expression - """ - self.positionTrack(p, functionName='p_inner_lets_initialized') + """ # ######################### CASE EXPRESSION ######################################## def p_expression_case(self, p): """ expression : CASE expression OF actions_list ESAC - """ - self.positionTrack(p, functionName='p_expression_case') + """ def p_actions_list(self, p): """ actions_list : actions_list action | action - """ - self.positionTrack(p, functionName='p_actions_list') + """ def p_action_expr(self, p): """ action : ID COLON TYPE ARROW expression SEMICOLON - """ - self.positionTrack(p, functionName='p_action_expr') + """ # ######################### METHODS DISPATCH ###################################### @@ -307,36 +242,31 @@ def p_action_expr(self, p): def p_expression_dispatch(self, p): """ expression : expression DOT ID LPAREN arguments_list_opt RPAREN - """ - self.positionTrack(p, functionName='p_expression_dispatch') + """ def p_arguments_list_opt(self, p): """ arguments_list_opt : arguments_list | empty - """ - self.positionTrack(p, functionName='p_arguments_list_opt') + """ def p_arguments_list(self, p): """ arguments_list : arguments_list COMMA expression | expression - """ - self.positionTrack(p, functionName='p_arguments_list') + """ def p_expression_static_dispatch(self, p): """ expression : expression AT TYPE DOT ID LPAREN arguments_list_opt RPAREN - """ - self.positionTrack(p, functionName='p_expression_static_dispatch') + """ def p_expression_self_dispatch(self, p): """ expression : ID LPAREN arguments_list_opt RPAREN - """ - self.positionTrack(p, functionName='p_expression_self_dispatch') + """ @@ -345,8 +275,7 @@ def p_expression_self_dispatch(self, p): def p_empty(self, p): """ empty : - """ - self.positionTrack(p, functionName='p_empty') + """ def findColumn(self, trackedRow): for i in range(len(self.parser.symstack) -1, 1, -1): @@ -358,20 +287,25 @@ def p_error(self, p): """ Error rule for Syntax Errors handling and reporting. """ - error_message = "EOF in string" if p is None else "Error at or near %s" %p.value - column_corrector = 0 if p is None else self.findColumn(p.lineno) - self.errors_parser.append( - error(message= error_message, - error_type="SyntacticError", - row_and_col= (0,0) if p is None else (p.lineno, p.lexpos - column_corrector - 1))) - print("Aquí ando") - self.parser.errok() - self.parser.token() + if p: + self.errors_parser.append( + error(message= "Error at or near %s" %p.value, + error_type="SyntacticError", + row_and_col= (p.lineno, self.real_col[str(p)] ))) + + else: + self.errors_parser.append( + error(message= "EOF in string", + error_type="SyntacticError", + row_and_col= (0, 0 ))) + + -def run_parser(tokens, source_program): - parserCool = pyCoolParser(tokens) +def run_parser(tokens, source_program, real_col): + #print(source_program) + parserCool = pyCoolParser(tokens, real_col) lexer.lineno = 1 - parserCool.parser.parse(source_program, tracking=True, lexer=lexer) + parserCool.parser.parse(source_program, lexer=lexer) return parserCool.errors_parser From 156089bb4a0bb177fe92c087e06cbf06e2b38b27 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Fri, 5 Jun 2020 21:42:15 -0400 Subject: [PATCH 14/23] three errors and 9 shit/reduce conflicts --- src/compiler/components/syntax_analyzer.py | 28 ++++++++-------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/compiler/components/syntax_analyzer.py b/src/compiler/components/syntax_analyzer.py index dfd04f4c..804919b0 100644 --- a/src/compiler/components/syntax_analyzer.py +++ b/src/compiler/components/syntax_analyzer.py @@ -188,7 +188,6 @@ def p_expression_while_loop(self, p): """ ## ######################### LET EXPRESSIONS ######################################## - def p_expression_let(self, p): """ expression : let_expression @@ -196,28 +195,21 @@ def p_expression_let(self, p): def p_expression_let_simple(self, p): """ - let_expression : LET ID COLON TYPE IN expression - | nested_lets COMMA LET ID COLON TYPE - """ - - def p_expression_let_initialized(self, p): + let_expression : LET nested_lets IN expression """ - let_expression : LET ID COLON TYPE ASSIGN expression IN expression - | nested_lets COMMA LET ID COLON TYPE ASSIGN expression - """ - - def p_inner_lets_simple(self, p): + + def p_nested_lets_simple(self, p): """ - nested_lets : ID COLON TYPE IN expression + nested_lets : ID COLON TYPE | nested_lets COMMA ID COLON TYPE - """ - - def p_inner_lets_initialized(self, p): """ - nested_lets : ID COLON TYPE ASSIGN expression IN expression + + def p_nested_lets_initialize(self, p): + """ + nested_lets : ID COLON TYPE ASSIGN expression | nested_lets COMMA ID COLON TYPE ASSIGN expression - """ - + """ + # ######################### CASE EXPRESSION ######################################## def p_expression_case(self, p): From a1e2abb18a4107481e9356ba1f90b9798cf68a69 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Fri, 5 Jun 2020 23:30:09 -0400 Subject: [PATCH 15/23] The insensitive case in keywords --- src/compiler/components/lexer_analyzer.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py index 5fce84c6..5f954ac3 100644 --- a/src/compiler/components/lexer_analyzer.py +++ b/src/compiler/components/lexer_analyzer.py @@ -35,10 +35,8 @@ 'pool':'POOL', 'class':'CLASS', 'loop':'LOOP', - 'true':'TRUE', 'inherits':'INHERITS', 'isvoid':'ISVOID', - 'false':'FALSE', "self": "SELF", } @@ -67,9 +65,9 @@ t_ARROW = r'\=\>' #complex rules -@TOKEN(r"(true|false)") +@TOKEN(r"(t[R|r][U|u][e|E]|f[a|A][l|L][s|S][e|E])") def t_BOOLEAN(token): - token.value = True if token.value == "true" else False + token.value = token.value.lower() return token @TOKEN(r"\d+") @@ -79,12 +77,12 @@ def t_INTEGER(token): @TOKEN(r"[A-Z][A-Za-z0-9_]*") def t_TYPE(token): - token.type = reserved.get(token.value, 'TYPE') + token.type = reserved.get(token.value.lower(), 'TYPE') return token @TOKEN(r"[a-z][A-Za-z0-9_]*") def t_ID(token): - token.type = reserved.get(token.value, "ID") + token.type = reserved.get(token.value.lower(), "ID") return token # Utility definitions From 9e23612b2d305dd0650ef55b81b053478a06afad Mon Sep 17 00:00:00 2001 From: DiazRock Date: Fri, 5 Jun 2020 23:33:00 -0400 Subject: [PATCH 16/23] Passed all test :) --- src/compiler/components/lexer_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py index 5f954ac3..8ef53622 100644 --- a/src/compiler/components/lexer_analyzer.py +++ b/src/compiler/components/lexer_analyzer.py @@ -38,6 +38,7 @@ 'inherits':'INHERITS', 'isvoid':'ISVOID', "self": "SELF", + "not" : "NOT" } tokens += list(reserved.values()) @@ -54,7 +55,6 @@ t_LTEQ = r'\<\=' t_ASSIGN = r'\<\-' t_INT_COMP = r'~' -t_NOT = r'not' t_LBRACE = r'\{' t_RBRACE = r'\}' t_COLON = r'\:' From 467240d3afa00e51117524abc23a55bd13538c31 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Mon, 8 Jun 2020 09:12:13 -0400 Subject: [PATCH 17/23] Passed all test for 'entrega-parser' --- src/main.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/main.py b/src/main.py index 82d313d1..0b246ae6 100644 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,7 @@ import sys, fileinput from argparse import ArgumentParser -from compiler.components.lexer_analyzer import tokenizer +from compiler.components.lexer_analyzer import tokenizer, tokens +from compiler.components.syntax_analyzer import run_parser parser_input = ArgumentParser(description= 'This is the Diaz-Horrach cool compiler, an school project.\nRead this help and see the ofitial repo') parser_input.add_argument('files_for_compile', help = 'The file(s) to be compiled', nargs= '+') @@ -14,13 +15,22 @@ file = open(args.files_for_compile[0]) working_input = file.read() -errors, tokens = tokenizer(working_input) +all_errors = [] +token_errors, tokens_for_input, real_col = tokenizer(working_input) +#print(tokens_for_input) -if errors: - for error in errors: +parser_errors = run_parser(tokens, working_input, real_col) + +all_errors += token_errors + parser_errors + +""" print('tokens for _input \n') +print(tokens_for_input) +print('---------------') """ +if all_errors: + for error in all_errors: print(error) exit(1) -print(tokens) + exit(0) From a48d4c9ce30d2870756840489eda8c8a189b12c9 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Mon, 8 Jun 2020 09:15:36 -0400 Subject: [PATCH 18/23] Some definitions for a better code. Useless right now --- src/compiler/components/abstract_component.py | 1 + src/compiler/components/dummy_component.py | 5 +- src/compiler/initialize.py | 4 +- src/compiler/utils/compiler_containers.py | 42 +++---- src/compiler/utils/lexer_definitions.py | 115 +++++++++--------- 5 files changed, 80 insertions(+), 87 deletions(-) diff --git a/src/compiler/components/abstract_component.py b/src/compiler/components/abstract_component.py index 30b7ed07..a5c09476 100644 --- a/src/compiler/components/abstract_component.py +++ b/src/compiler/components/abstract_component.py @@ -10,6 +10,7 @@ def __init__(self, self.component_name = component_name self.debug_session = debug_session self.error_log_file = error_log_file + if build_after_initialize: self.build_component() diff --git a/src/compiler/components/dummy_component.py b/src/compiler/components/dummy_component.py index 36e261eb..d462f321 100644 --- a/src/compiler/components/dummy_component.py +++ b/src/compiler/components/dummy_component.py @@ -3,8 +3,8 @@ class dummy_component (Component): def __init__(self, component_name, *args, **kwargs): - component_name = component_name + "dummy_" - super().__init__(*args, component_name = component_name + "dummy_", **kwargs) + component_name = "dummy_" + component_name + super().__init__(*args, component_name = "dummy_" + component_name, **kwargs) @@ -12,4 +12,3 @@ def build_component(self): super().build_component() return "Sorry, this is just a %s component" %(self.component_name) - diff --git a/src/compiler/initialize.py b/src/compiler/initialize.py index 317c53dc..7ebf2a62 100644 --- a/src/compiler/initialize.py +++ b/src/compiler/initialize.py @@ -1,10 +1,8 @@ -from .utils.compiler_containers import component_container - +from utils import compiler_containers class compiler: def __init__(self, lexer, parser): self.symbolTable = {} - self.lexer = lexer self.parser = parser pass \ No newline at end of file diff --git a/src/compiler/utils/compiler_containers.py b/src/compiler/utils/compiler_containers.py index a9162f38..9484d1a7 100644 --- a/src/compiler/utils/compiler_containers.py +++ b/src/compiler/utils/compiler_containers.py @@ -1,27 +1,21 @@ from dependency_injector import containers, providers -from compiler.components.dummy_component import dummy_component -import lexer_definitions -from compiler.components.lexer_analyzer import lexer_analyzer +from ..components import dummy_component, lexer_analyzer +from ..utils import lexer_definitions -class component_container (containers.DeclarativeContainer): - dummy_lexer = providers.Factory(dummy_component, "Lexer") - cool_lexer = providers.Factory(lexer_analyzer) - dummy_parser = providers.Factory(dummy_component, "Parser") - - -container_dict = { - 'lexer_options': { 'd': component_container.dummy_lexer }, - 'parser_options': {'d' : component_container.dummy_parser} - } - -class lexer_analyzer_dependency_container (containers.DeclarativeContainer): - #This is just readonly properties - reserved_keywords_cool = lexer_definitions.basic_keywords - tokens_collection_cool = lexer_definitions.tokens_collection - simple_rules_cool = lexer_definitions.simple_rules - complex_rules_cool = lexer_definitions.complex_rules - error_handlers_cool = lexer_definitions.error_handlers - #---------------- - - +component_injector = { + 'lexer_options': { 'dummy': dummy_component, + 'cool' : lexer_analyzer }, + 'parser_options': {'dummy' : dummy_component} + } +params_for_component = { + 'lexer' : { + 'cool' : { + 'basic_keywords': lexer_definitions.basic_keywords, + 'simple_rules' : lexer_definitions.simple_rules, + 'complex_rules' : lexer_definitions.complex_rules, + 'tokens_collection' : lexer_definitions.tokens_collection, + 'error_handlers' : lexer_definitions.error_handlers + } + } +} \ No newline at end of file diff --git a/src/compiler/utils/lexer_definitions.py b/src/compiler/utils/lexer_definitions.py index 2b29f0f2..77c27e4e 100644 --- a/src/compiler/utils/lexer_definitions.py +++ b/src/compiler/utils/lexer_definitions.py @@ -23,6 +23,8 @@ def __eq__(self, other: str): if val != 'true' and val != 'false': return val == other.lower() return val[0] == other[0] and val[1:] == other.lower()[1:] + def __hash__(self): + return super().__hash__() basic_keywords = { "case": keyword("case"), @@ -71,27 +73,28 @@ def __eq__(self, other: str): #ignore spaces t_ignore = ' \t\r\f' -simple_rules = [ - t_LPAREN, - t_RPAREN, - t_LBRACE, - t_RBRACE, - t_COLON, - t_COMMA, - t_DOT, - t_SEMICOLON, - t_AT, - t_PLUS, - t_MINUS, - t_MULTIPLY, - t_DIVIDE, - t_EQ, - t_LT, - t_LTEQ, - t_ASSIGN, - t_INT_COMP, - t_NOT -] +simple_rules = { + 't_LPAREN': t_LPAREN, + 't_RPAREN': t_RPAREN, + 't_LBRACE': t_LBRACE, + 't_RBRACE': t_RBRACE, + 't_COLON': t_COLON, + 't_COMMA': t_COMMA, + 't_DOT': t_DOT, + 't_SEMICOLON': t_SEMICOLON, + 't_AT': t_AT, + 't_PLUS': t_PLUS, + 't_MINUS': t_MINUS, + 't_MULTIPLY': t_MULTIPLY, + 't_DIVIDE': t_DIVIDE, + 't_EQ': t_EQ, + 't_LT': t_LT, + 't_LTEQ': t_LTEQ, + 't_ASSIGN': t_ASSIGN, + 't_INT_COMP': t_INT_COMP, + 't_NOT': t_NOT, + 't_ignore' : t_ignore +} #Complex rules for cool @@ -150,7 +153,7 @@ def t_STRING_newline(token): @TOKEN(r"\"") -def t_STRING_end(self, token): +def t_STRING_end(token): if not token.lexer.string_backslashed: token.lexer.pop_state() token.value = token.lexer.stringbuf @@ -161,7 +164,7 @@ def t_STRING_end(self, token): token.lexer.string_backslashed = False @TOKEN(r"[^\n]") -def t_STRING_anything(self, token): +def t_STRING_anything( token): if token.lexer.string_backslashed: if token.value == 'b': token.lexer.stringbuf += '\b' @@ -193,74 +196,72 @@ def t_STRING_anything(self, token): # The comment states @TOKEN(r"\(\*") -def t_COMMENT_start(self, token): +def t_COMMENT_start(token): token.lexer.push_state("COMMENT") token.lexer.comment_count = 0 #Comments can be recursive @TOKEN(r"\(\*") -def t_COMMENT_startanother(self, t): +def t_COMMENT_startanother(t): t.lexer.comment_count += 1 @TOKEN(r"\*\)") -def t_COMMENT_end(self, token): +def t_COMMENT_end(token): if token.lexer.comment_count == 0: token.lexer.pop_state() else: token.lexer.comment_count -= 1 -# COMMENT ignored characters -t_COMMENT_ignore = '' +t_ignore_SINGLE_LINE_COMMENT = r"\-\-[^\n]*" #Error handlers - +#(4, 2) - LexicographicError: ERROR "!" # STRING error handler -def t_STRING_error(self, token): - print("Illegal character! Line: {0}, character: {1}".format(token.lineno, token.value[0])) +def t_STRING_error( token): + print('({0}, {1}) - LexicographicError: ERROR "{2}"'.format(token.lineno, token.lexpos, token.value[0])) token.lexer.skip(1) - +#(55, 46) - LexicographicError: EOF in comment # COMMENT error handler -def t_COMMENT_error(self, token): +def t_COMMENT_error( token): + print("({0}, {1}) - LexicographicError: EOF in comment".format(token.lineno, token.lexpos)) token.lexer.skip(1) -def t_error(self, token): - print("Illegal character! Line: {0}, character: {1}".format(token.lineno, token.value[0])) + +def t_error( token): + print('({0}, {1}) - LexicographicError: ERROR "{2}"'.format(token.lineno, token.lexpos, token.value[0])) token.lexer.skip(1) #Complex rules list -complex_rules = [ - t_BOOLEAN, - t_INTEGER, - t_TYPE, - t_newline, - t_ID, +complex_rules = { + 't_BOOLEAN': t_BOOLEAN, + 't_INTEGER': t_INTEGER, + 't_TYPE': t_TYPE, + 't_newline': t_newline, + 't_ID': t_ID, #---------- #String states rules - t_STRING_start, - t_STRING_newline, - t_STRING_anything, - t_STRING_end, - t_STRING_ignore, + 't_STRING_start': t_STRING_start, + 't_STRING_newline': t_STRING_newline, + 't_STRING_anything': t_STRING_anything, + 't_STRING_end': t_STRING_end, #---------- #Comment states rules - t_COMMENT_start, - t_COMMENT_startanother, - t_COMMENT_end, - t_COMMENT_ignore - ] - + 't_COMMENT_start': t_COMMENT_start, + 't_COMMENT_startanother': t_COMMENT_startanother, + 't_COMMENT_end': t_COMMENT_end, +} #Error handlers -error_handlers = [ - t_STRING_error, - t_COMMENT_error, - t_error -] +error_handlers = { + 't_STRING_error': t_STRING_error, + 't_COMMENT_error': t_COMMENT_error, + 't_error': t_error +} From c1b0ab2055e8c3211b53108de2d689fab91a1df9 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Tue, 16 Jun 2020 10:31:48 -0400 Subject: [PATCH 19/23] The errors definitions --- src/compiler/utils/errors.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 src/compiler/utils/errors.py diff --git a/src/compiler/utils/errors.py b/src/compiler/utils/errors.py new file mode 100644 index 00000000..fcb6af92 --- /dev/null +++ b/src/compiler/utils/errors.py @@ -0,0 +1,10 @@ +class error: + def __init__(self, error_type, row_and_col, message): + self.error_type = error_type + self.row_and_col = row_and_col + self.message = message + + def __str__(self): + return '(%d, %d) - %s: %s' %(self.row_and_col[0], self.row_and_col[1], self.error_type, self.message) + + __repr__ = __str__ \ No newline at end of file From 0e9c7a67996a9a4ae80f0b2a9d4c807a790fb48a Mon Sep 17 00:00:00 2001 From: DiazRock Date: Tue, 6 Oct 2020 13:09:51 -0400 Subject: [PATCH 20/23] This is the one who works --- src/compiler/components/lexer_analyzer.py | 6 +++--- src/compiler/components/syntax_analyzer.py | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/compiler/components/lexer_analyzer.py b/src/compiler/components/lexer_analyzer.py index 8ef53622..4353af2e 100644 --- a/src/compiler/components/lexer_analyzer.py +++ b/src/compiler/components/lexer_analyzer.py @@ -14,7 +14,7 @@ "LPAREN", "RPAREN", "LBRACE", "RBRACE", "COLON", "COMMA", "DOT", "SEMICOLON", "AT", # Operators - "PLUS", "MINUS", "MULTIPLY", "DIVIDE", "EQ", "LT", "LTEQ", "ASSIGN", "INT_COMP", "NOT", + "PLUS", "MINUS", "MULTIPLY", "DIVIDE", "EQ", "LT", "LTEQ", "ASSIGN", "INT_COMP", # Special Operators "ARROW" @@ -38,7 +38,7 @@ 'inherits':'INHERITS', 'isvoid':'ISVOID', "self": "SELF", - "not" : "NOT" + "not": "NOT" } tokens += list(reserved.values()) @@ -88,7 +88,7 @@ def t_ID(token): # Utility definitions @TOKEN(r'\n+') def t_newline(t): - global readjust_col + global readjust_col readjust_col = t.lexpos + len(t.value) t.lexer.lineno += len(t.value) diff --git a/src/compiler/components/syntax_analyzer.py b/src/compiler/components/syntax_analyzer.py index 804919b0..ae22da5e 100644 --- a/src/compiler/components/syntax_analyzer.py +++ b/src/compiler/components/syntax_analyzer.py @@ -24,29 +24,29 @@ def __init__(self, tokens, real_col): ('left', 'AT'), ('left', 'DOT') ) - + def p_program(self, p): """ program : class_list - """ - + """ + def p_class_list(self, p): """ class_list : class_list class SEMICOLON | class SEMICOLON - """ - + """ + def p_class(self, p): """ class : CLASS TYPE LBRACE features_list_opt RBRACE - """ - + """ + def p_class_inherits(self, p): """ class : CLASS TYPE INHERITS TYPE LBRACE features_list_opt RBRACE """ - + def p_feature_list_opt(self, p): """ features_list_opt : features_list @@ -103,7 +103,7 @@ def p_expression_integer_constant(self, p): def p_expression_boolean_constant(self, p): """ expression : BOOLEAN - """ + """ def p_expression_string_constant(self, p): """ From f359a6a9f3ab8c3ef1a6fef39cf2076818d8c26f Mon Sep 17 00:00:00 2001 From: DiazRock Date: Sun, 24 Jan 2021 22:25:22 -0500 Subject: [PATCH 21/23] Selected only the first error --- src/main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main.py b/src/main.py index 0b246ae6..48bc0652 100644 --- a/src/main.py +++ b/src/main.py @@ -17,18 +17,18 @@ all_errors = [] token_errors, tokens_for_input, real_col = tokenizer(working_input) +if token_errors: + print(token_errors[0]) + exit(1) #print(tokens_for_input) parser_errors = run_parser(tokens, working_input, real_col) -all_errors += token_errors + parser_errors - """ print('tokens for _input \n') print(tokens_for_input) print('---------------') """ -if all_errors: - for error in all_errors: - print(error) +if parser_errors: + print(parser_errors[0]) exit(1) From 659a8028bc07c33545f9fa13f5aa065160ad9a43 Mon Sep 17 00:00:00 2001 From: DiazRock Date: Sun, 24 Jan 2021 23:10:27 -0500 Subject: [PATCH 22/23] Added the python version to coolc.sh file --- src/coolc.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/coolc.sh b/src/coolc.sh index b6e7d50d..002ce0cb 100755 --- a/src/coolc.sh +++ b/src/coolc.sh @@ -1,6 +1,3 @@ -# Incluya aquí las instrucciones necesarias para ejecutar su compilador - - INPUT_FILE=$1 OUTPUT_FILE=${INPUT_FILE:0: -2}mips @@ -11,10 +8,10 @@ OUTPUT_FILE=${INPUT_FILE:0: -2}mips # Si su compilador no lo hace ya, aquí puede imprimir la información de contacto -echo "LINEA_CON_NOMBRE_Y_VERSION_DEL_COMPILADOR" # TODO: Recuerde cambiar estas -echo "CopyLeft (L) 2020: Alejandro Díaz Roque, Rafael Horrach" +echo "DiazRock Compiler" # TODO: Recuerde cambiar estas +echo "CopyLeft (L) 2020: Alejandro Díaz Roque" # Llamar al compilador -python main.py "$@" +python3 main.py "$@" #echo "Compiling $INPUT_FILE into $OUTPUT_FILE" From 709a60c77c2ea61b63eb1ab1e61231742a65ba3c Mon Sep 17 00:00:00 2001 From: DiazRock Date: Sun, 24 Jan 2021 23:19:05 -0500 Subject: [PATCH 23/23] Added the ply requirement --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 9eb0cad1..8cf0ffb8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ +ply pytest pytest-ordering