diff --git a/README.md b/README.md index d96dc6b..992ce4d 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,14 @@ # problemas +- [ ] Output colorido em testes +- [ ] Todo ID deve ser checado pelo parser se é reservado? +- [ ] Refactor indispensável no parser: remover erros que não serão usados +- [ ] Refatorar testes: setup e teardown +- [ ] Remover enums erros desnecessarias do parser +- [X] Objeto parser com seu proprio estado (token, file, line) +- [ ] Contador de linha esta funcionando no analisador sintatico? +- [ ] Parser testes unitarios + integracao - [ ] Adicionar especificacao da linguagem `proc` e manter atualizada - [ ] Refactor: remover transition.h e transition.c - [ ] Refactor: organizar melhor as enums... diff --git a/build.sh b/build.sh index 8e4ba10..9a47261 100755 --- a/build.sh +++ b/build.sh @@ -1,9 +1,12 @@ #!/bin/bash if [[ $1 == 'test' ]]; then - gcc -g ./test/*.c ./lexer/*.c -o croc - echo 'built test.' + gcc -g ./test/*.c ./lexer/*.c ./parser/*.c -o croc + echo 'built test.' +elif [[ $1 == 'debug' ]]; then + gcc -g ./debug.c ./lexer/*.c ./parser/*.c -o croc + echo 'built debug.' else - gcc -g ./main.c ./lexer/*.c -o croc - echo 'built main.' + gcc -g ./main.c ./lexer/*.c ./parser/*.c -o croc + echo 'built main.' fi diff --git a/debug.c b/debug.c new file mode 100644 index 0000000..18a9e40 --- /dev/null +++ b/debug.c @@ -0,0 +1,73 @@ +#include "./lexer/lexer.h" +#include "./lexer/printer.h" +#include "./lexer/types.h" +#include "./parser/parser.h" +#include "./parser/syntax_error.h" +#include +#include +#include + +// ANSI escape codes +#define RESET "\033[0m" +#define RED "\033[31m" +#define GREEN "\033[32m" + +int main(int argc, char *argv[]) { + /*if (argc != 2) {*/ + /* fprintf(stderr, "Error. Usage: croc \n");*/ + /* return EXIT_FAILURE;*/ + /*}*/ + + FILE *fd; + int lineCount = 1; + + fd = fopen("./doc/examples/code.proc", "r"); + if (fd == NULL) { + fprintf(stderr, "Error opening file\n"); + return EXIT_FAILURE; + } + + // lexing! + while (true) { + struct Token token = lexerGetNextChar(fd, &lineCount); + if (token.category == END_OF_FILE) { + fclose(fd); + break; + } + // handle malformed manually to + // keep the printing callback simple. + if (token.category == MALFORMED_TOKEN) { + fprintf(stderr, RED "ERROR: MALFORMED TOKEN %s ON LINE %d\n" RESET, token.lexeme, + lineCount); + exit(EXIT_FAILURE); + } + printToken(token); + printf("\n"); + } + + printf(GREEN "Lexing successful!" RESET "\n"); + + int *lc; + int line = 1; + lc = &line; + + fd = fopen("./doc/examples/code.proc", "r"); + if (fd == NULL) { + fprintf(stderr, "Error opening file\n"); + return EXIT_FAILURE; + } + + // for integration tests, prog itself needs a previously initialized parser + // with a token too + struct Token token = lexerGetNextChar(fd, lc); + struct Parser parser = { + .fd = fd, .lineCount = lc, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + if (error != NO_ERROR) { + printSyntaxError(error, lc); + exit(EXIT_FAILURE); + } + + printf(GREEN "Parsing successful!" RESET "\n"); +} diff --git a/doc/examples/code.proc b/doc/examples/code.proc index 6dda013..381db29 100644 --- a/doc/examples/code.proc +++ b/doc/examples/code.proc @@ -1,5 +1 @@ -var a = 80 // something -if (a == 80) // something else -for (int i = 0) 'a' '\n' banana '\0' -4.4 -int a[2] = {1, 2} +prot a - diff --git a/doc/flap2.jff b/doc/flap2.jff deleted file mode 100644 index cd3a15a..0000000 --- a/doc/flap2.jff +++ /dev/null @@ -1,512 +0,0 @@ - - fa - - - - 877.0 - 320.0 - - - 755.0 - 186.0 - - - 645.0 - 136.0 - - - 490.0 - 29.0 - - - - - 869.0 - 136.0 - - - 792.0 - 32.0 - - - - - 985.0 - 169.0 - - - 1046.0 - 87.0 - - - 1179.0 - 28.0 - - - - - 1070.0 - 243.0 - - - 1212.0 - 121.0 - - - 1190.0 - 235.0 - - - 1326.0 - 304.0 - - - 1452.0 - 141.0 - - - - - 1122.0 - 383.0 - - - 1348.0 - 417.0 - - - - - 1259.0 - 471.0 - - - 1332.0 - 559.0 - - - 1087.0 - 520.0 - - - 1110.0 - 690.0 - - - - - 1041.0 - 676.0 - - - - - 979.0 - 687.0 - - - - - 1244.0 - 673.0 - - - - - 921.0 - 670.0 - - - - - 867.0 - 694.0 - - - - - 810.0 - 683.0 - - - - - 743.0 - 701.0 - - - - - 665.0 - 702.0 - - - - - 535.0 - 173.0 - - - 419.0 - 48.0 - - - - - 307.0 - 48.0 - - - - - 464.0 - 217.0 - - - 208.0 - 53.0 - - - - - 92.0 - 52.0 - - - - - 386.0 - 256.0 - - - 39.0 - 132.0 - - - - - 468.0 - 346.0 - - - 85.0 - 249.0 - - - - - 93.0 - 352.0 - - - - - 511.0 - 422.0 - - - 81.0 - 434.0 - - - - - 164.0 - 485.0 - - - - - 538.0 - 533.0 - - - 317.0 - 615.0 - - - - - 513.0 - 704.0 - - - - - - 14 - 15 - aspas - - - 0 - 4 - digito - - - 0 - 26 - fechacolchete - - - 1 - 2 - letra - - - 14 - 16 - ch - - - 42 - 43 - = - - - 0 - 28 - = - - - 0 - 36 - < - - - 0 - 34 - | - - - 0 - 0 - tab, espaco, \n - - - 0 - 21 - multiplicacao - - - 17 - 22 - outro* - - - 12 - 13 - apostrofo - - - 28 - 29 - = - - - 31 - 32 - & - - - 11 - 12 - 0, n - - - 18 - 0 - \n - - - 4 - 6 - ponto - - - 9 - 10 - ch - - - 0 - 23 - abreparenteses - - - 28 - 30 - outro* - - - 31 - 33 - outro* - - - 39 - 41 - outro* - - - 0 - 25 - abrecolchete - - - 4 - 5 - outro* - - - 0 - 17 - barra - - - 2 - 2 - letra, digito, sublinha - - - 17 - 18 - barra - - - 0 - 27 - virgula - - - 0 - 19 - mais - - - 1 - 1 - sublinha - - - 10 - 13 - apostrofo - - - 39 - 40 - = - - - 6 - 7 - digito - - - 0 - 1 - sublinha - - - 36 - 38 - outro* - - - 9 - 11 - \ - - - 0 - 39 - > - - - 0 - 20 - menos - - - 0 - 24 - fechaparenteses - - - 0 - 31 - & - - - 42 - 44 - outro* - - - 36 - 37 - = - - - 34 - 35 - | - - - 0 - 9 - apostrofo - - - 16 - 15 - aspas - - - 16 - 16 - ch - - - 7 - 8 - outro* - - - 2 - 3 - outro* - - - 18 - 18 - outro* - - - 0 - 14 - aspas - - - 0 - 2 - letra - - - 0 - 42 - ! - - - 4 - 4 - digito - - - 7 - 7 - digito - - - \ No newline at end of file diff --git a/doc/flap2.png b/doc/flap2.png deleted file mode 100644 index 39803a4..0000000 Binary files a/doc/flap2.png and /dev/null differ diff --git a/doc/spec_proc_152.pdf b/doc/spec_proc_152.pdf new file mode 100644 index 0000000..90bf177 Binary files /dev/null and b/doc/spec_proc_152.pdf differ diff --git a/lexer/printer.c b/lexer/printer.c index 51fe1c6..8505ed7 100644 --- a/lexer/printer.c +++ b/lexer/printer.c @@ -1,10 +1,13 @@ #include "printer.h" +#include "transition.h" #include "types.h" #include #define TOKEN_CATEGORY_QTY 7 #define SIGN_CATEGORY_QTY 22 +#define KEYWORD_QTY 31 + struct TokenCategoryHandler tokenCategoryHandleData[TOKEN_CATEGORY_QTY] = { {RSV, handleRsv}, {ID, handleId}, {CHARCON, handleCharcon}, {STRINGCON, handleStringcon}, @@ -35,7 +38,27 @@ struct SignCategoryHandler signCategoryHandleData[SIGN_CATEGORY_QTY] = { {OPEN_CURLY, "OPEN_CURLY"}, {CLOSE_CURLY, "CLOSE_CURLY"}}; -void handleRsv(struct Token token) { printf("", token.lexeme); } +void handleRsv(struct Token token) { + + struct ReservedWord reservedKeywords[KEYWORD_QTY] = { + {"const", CONST}, {"init", INIT}, {"endp", ENDP}, + {"char", CHAR}, {"int", INT}, {"real", REAL}, + {"bool", BOOL}, {"do", DO}, {"while", WHILE}, + {"endw", ENDW}, {"var", VAR}, {"from", FROM}, + {"to", TO}, {"dt", DT}, {"by", BY}, + {"if", IF}, {"endv", ENDV}, {"elif", ELIF}, + {"else", ELSE}, {"endi", ENDI}, {"getout", GETOUT}, + {"getint", GETINT}, {"getchar", GETCHAR}, {"getreal", GETREAL}, + {"putint", PUTINT}, {"putchar", PUTCHAR}, {"putreal", PUTREAL}, + {"getstr", GETSTR}, {"putstr", PUTSTR}, {"def", DEF}, + {"prot", PROT}}; + + for (int i = 0; i < KEYWORD_QTY; i++) { + if (token.signCode == reservedKeywords[i].reservedCode) { + printf("", reservedKeywords[i].lexeme); + } + } +} void handleId(struct Token token) { printf("", token.lexeme); } diff --git a/lexer/transition.c b/lexer/transition.c index beba8b3..f12cf30 100644 --- a/lexer/transition.c +++ b/lexer/transition.c @@ -71,8 +71,9 @@ bool handleTransitionAndWasTokenBuilt(FILE *fd, char ch, struct Token *token, for (int i = 0; i < KEYWORD_QTY; i++) { if (strcmp(token->lexeme, reservedKeywords[i].lexeme) == 0) { token->category = RSV; - token->signCode = reservedKeywords[i].reservedCode; strcpy(token->lexeme, reservedKeywords[i].lexeme); + token->signCode = reservedKeywords[i].reservedCode; + break; } } } else if (token->category == CHARCON) { diff --git a/main.c b/main.c index 30fd3b8..5ce1558 100644 --- a/main.c +++ b/main.c @@ -1,10 +1,17 @@ #include "./lexer/lexer.h" #include "./lexer/printer.h" #include "./lexer/types.h" +#include "./parser/parser.h" +#include "./parser/syntax_error.h" #include #include #include +// ANSI escape codes +#define RESET "\033[0m" +#define RED "\033[31m" +#define GREEN "\033[32m" + int main(int argc, char *argv[]) { if (argc != 2) { fprintf(stderr, "Error. Usage: croc \n"); @@ -25,16 +32,43 @@ int main(int argc, char *argv[]) { struct Token token = lexerGetNextChar(fd, &lineCount); if (token.category == END_OF_FILE) { fclose(fd); - return EXIT_SUCCESS; + break; } // handle malformed manually to // keep the printing callback simple. - switch (token.category) { - case MALFORMED_TOKEN: - printf("ERROR: MALFORMED TOKEN %s ON LINE %d\n", token.lexeme, lineCount); + if (token.category == MALFORMED_TOKEN) { + fprintf(stderr, RED "ERROR: MALFORMED TOKEN %s ON LINE %d\n" RESET, token.lexeme, + lineCount); exit(EXIT_FAILURE); } printToken(token); printf("\n"); } + + printf(GREEN "Lexing successful!" RESET "\n"); + + int *lc; + int line = 1; + lc = &line; + + // opening the file again... + fd = fopen("./doc/examples/code.proc", "r"); + if (fd == NULL) { + fprintf(stderr, "Error opening file\n"); + return EXIT_FAILURE; + } + + // for integration tests, prog itself needs a previously initialized parser + // with a token too + struct Token token = lexerGetNextChar(fd, lc); + struct Parser parser = { + .fd = fd, .lineCount = lc, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + if (error != NO_ERROR) { + printSyntaxError(error, lc); + exit(EXIT_FAILURE); + } + + printf(GREEN "Parsing successful!" RESET "\n"); } diff --git a/parser/parser.c b/parser/parser.c index b49668f..0ac5a5d 100644 --- a/parser/parser.c +++ b/parser/parser.c @@ -1,141 +1,328 @@ #include "parser.h" #include "../lexer/lexer.h" #include "../lexer/transition.h" -#include -#include +#include "syntax_error.h" + +#define MAX_ARRAY_DIMENSIONS 2 + +enum SYNTAX_ERROR op_rel(struct Parser *parser) { + if (parser->token.category != SIGN || + !(parser->token.signCode == COMPARISON || + parser->token.signCode == DIFFERENT || + parser->token.signCode == SMALLER_EQUAL || + parser->token.signCode == SMALLER_EQUAL || + parser->token.signCode == LARGER_EQUAL || + parser->token.signCode == LARGER_THAN)) { + return INVALID_OPERATOR; + } + + return NO_ERROR; +} + +enum SYNTAX_ERROR fator(struct Parser *parser) { + if (!(parser->token.category == ID || parser->token.category == INTCON || + parser->token.category == REALCON || + parser->token.category == CHARCON || + (parser->token.category == SIGN && + parser->token.signCode == OPEN_PAR) || + (parser->token.category == SIGN && + parser->token.signCode == NEGATION))) { + return NO_FACTOR_VALID_START_SYMBOL; + } + + // no need to validate int/real/char? + + // !fator + if (parser->token.category == SIGN && parser->token.signCode == NEGATION) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + enum SYNTAX_ERROR error = fator(parser); + if (error != NO_ERROR) { + return NO_FACTOR_AFTER_BANG; + } + } + + // (expr) + if (parser->token.category == SIGN && parser->token.signCode == OPEN_PAR) { + enum SYNTAX_ERROR error = expr(parser); + if (error != NO_ERROR) { + return error; + } + } + + // id {[expr]} + if (parser->token.category == ID) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (parser->token.category == SIGN && + parser->token.signCode == OPEN_BRACK) { + enum SYNTAX_ERROR error = arrayFator(parser); + if (error != NO_ERROR) { + return error; + } + } else if (parser->token.category == SIGN && + parser->token.signCode == CLOSE_BRACK) { + return INVALID_FACTOR_ARRAY_BRACKET_OPEN; + } else { + return NO_ERROR; + } + } + return NO_ERROR; +} + +enum SYNTAX_ERROR arrayFator(struct Parser *parser) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + enum SYNTAX_ERROR error = expr(parser); + if (error != NO_ERROR) { + return error; + } + + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == SIGN && + parser->token.signCode == CLOSE_BRACK)) { + return INVALID_FACTOR_ARRAY_BRACKET_CLOSE; + } + + return NO_ERROR; +} + +enum SYNTAX_ERROR expr(struct Parser *parser) { + // TODO: + return NO_ERROR; +} /** * prog accepts repetitions of declarations of variables (decl_list_var), or * procedures (decl_list_proc). */ -void prog(FILE *fd, int *lineCount) { - struct Token token = lexerGetNextChar(fd, lineCount); - +enum SYNTAX_ERROR prog(struct Parser *parser) { // Both declaration of variables and procedures start with reserved words. - if (token.category == RSV) { + if (parser->token.category == RSV) { // Valid variable declaration start tokens - while (token.signCode == CONST || token.signCode == CHAR || - token.signCode == INT || token.signCode == REAL || - token.signCode == BOOL) { - declListVar(fd, lineCount, token); + while (parser->token.signCode == CONST || parser->token.signCode == CHAR || + parser->token.signCode == INT || parser->token.signCode == REAL || + parser->token.signCode == BOOL) { + enum SYNTAX_ERROR error = declListVar(parser); + if (error != NO_ERROR) { + return error; + } } // Valid procedure declaration/definition tokens - while (token.signCode == DEF || token.signCode == PROT) { - declDefProc(fd, lineCount, token); + while (parser->token.signCode == DEF || parser->token.signCode == PROT) { + enum SYNTAX_ERROR error = declDefProc(parser); + if (error != NO_ERROR) { + return error; + } } + } else { + return INVALID_PROG_START_KEYWORD; } + return NO_ERROR; } -/** - * decl_list_var accepts optionally a `const`, followed by variable type, and - * declaration of one or more variables. - */ -void declListVar(FILE *fd, int *lineCount, struct Token token) { - // if const, read next - if (token.signCode == CONST) { - token = lexerGetNextChar(fd, lineCount); - } - // get remaining parts of variable declaration - tipo(fd, lineCount, token); - declVar(fd, lineCount, token); - // can accept more variable declarations - while (token.category == SIGN && token.signCode == COMMA) { - // consume next - token = lexerGetNextChar(fd, lineCount); - declVar(fd, lineCount, token); +/** decl_list_var accepts optionally a `const`, followed by variable type, + * and +declaration of one or more variables.*/ +enum SYNTAX_ERROR declListVar(struct Parser *parser) { + if (parser->token.signCode == CONST) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); } -} - -/** - * type must be valid - */ -void tipo(FILE *fd, int *lineCount, struct Token token) { - if (token.signCode == CHAR || token.signCode == INT || - token.signCode == REAL || token.signCode == BOOL) { - struct Token token = lexerGetNextChar(fd, lineCount); - } else { - // report error if type not detected - fprintf(stderr, "Syntax error: variable type not detected\n"); - exit(EXIT_FAILURE); + if (!(parser->token.category == RSV && + (parser->token.signCode == INT || parser->token.signCode == REAL || + parser->token.signCode == CHAR || parser->token.signCode == BOOL))) { + return INVALID_TYPE; } -} -/** - * We already dealt with type. - * Variable declaration is valid: - * Simply declaring a identifier, or optionally initializing it. - * Can declare as array using curly braces with size given by number or id. Can - * optionally initialize it with square brackets. - */ -void declVar(FILE *fd, int *lineCount, struct Token token) { - // id mandatory. - if (token.category != ID) { - fprintf(stderr, "Syntax error: expected variable ID\n"); - exit(EXIT_FAILURE); - } - // consume next - token = lexerGetNextChar(fd, lineCount); - - // handle simple variable or array - // simple variable - if (token.category == SIGN && token.signCode == ASSIGN) { - // assignment detected, check next token - token = lexerGetNextChar(fd, lineCount); - if (token.category == CHARCON || token.category == STRINGCON || - token.category == INTCON || token.category == REALCON) { - // consume next - token = lexerGetNextChar(fd, lineCount); - } else { - fprintf(stderr, "Syntax error: expected valid variable initialization\n"); - exit(EXIT_FAILURE); + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + enum SYNTAX_ERROR error = declVar(parser); + if (error != NO_ERROR) { + return error; + } + // TODO: handle multiple variable declarations here + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + while (parser->token.category == SIGN && parser->token.signCode == COMMA) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + enum SYNTAX_ERROR error = declVar(parser); + if (error != NO_ERROR) { + return error; } - - // array: should deal with multidimensional arrays (declaration) and - // initialization - } else if (token.category == SIGN && token.signCode == OPEN_BRACK) { - arrayDeclaration(fd, lineCount, token); } + return NO_ERROR; } -void arrayDeclaration(FILE *fd, int *lineCount, struct Token token) { - while (token.category == SIGN && token.signCode == OPEN_BRACK) { - // consume next - token = lexerGetNextChar(fd, lineCount); - // valid array initializer - if (token.category != ID && token.category != INTCON) { - fprintf(stderr, "Syntax error: expected valid array size\n"); - exit(EXIT_FAILURE); - } else { - // consume next - token = lexerGetNextChar(fd, lineCount); - if (token.category != SIGN && token.signCode != CLOSE_BRACK) { +enum SYNTAX_ERROR declVar(struct Parser *parser) { + if (parser->token.category != ID) { + return NO_VAR_ID; + } - fprintf(stderr, "Syntax error: expected array bracket closing\n"); - exit(EXIT_FAILURE); + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + bool isArray = false; + int arrayDimensions = 0; + // is array + if (parser->token.category == SIGN && parser->token.signCode == OPEN_BRACK) { + isArray = true; + while (parser->token.category == SIGN && + parser->token.signCode == OPEN_BRACK) { + arrayDimensions++; + if (arrayDimensions > MAX_ARRAY_DIMENSIONS) { + return INVALID_ARRAY_DIMENSION_DECLARATION; + } + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + + if (!(parser->token.category == INTCON || parser->token.category == ID)) { + return INVALID_ARRAY_SUBSCRIPT_DEC; + } + + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == SIGN && + parser->token.signCode == CLOSE_BRACK)) { + return INVALID_ARRAY_BRACKET_DEC_CLOSE; } else { - token = lexerGetNextChar(fd, lineCount); + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + } + } + } + + // assignment + if (parser->token.category == SIGN && parser->token.signCode == ASSIGN) { + if (isArray) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == SIGN && + parser->token.signCode == OPEN_CURLY)) { + return INVALID_ARRAY_INIT_CURLY_OPEN; + } + + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); - if (token.category == SIGN && token.signCode == OPEN_BRACK) { - // dealing with multidimensional arrays + while (true) { + if (!(parser->token.category == INTCON || + parser->token.category == REALCON || + parser->token.category == CHARCON)) { + return INVALID_ARRAY_TYPE_INIT; + } + + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (parser->token.category == SIGN && parser->token.signCode == COMMA) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); continue; - } else if (token.category == SIGN && token.signCode == ASSIGN) { - // consume next - token = lexerGetNextChar(fd, lineCount); - if (token.category != SIGN && token.signCode != OPEN_CURLY) { - - } - // it's optional in the grammar. - arrayInitialization(fd, lineCount, token); + } else if (parser->token.category == SIGN && + parser->token.signCode == CLOSE_CURLY) { break; } else { - // no array declaration. what to do? + return INVALID_ARRAY_INIT_CURLY_CLOSE; } } + + } else { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == INTCON || + parser->token.category == REALCON || + parser->token.category == CHARCON)) { + return INVALID_VAR_TYPE_INIT; + } } } + + return NO_ERROR; } -void arrayInitialization(FILE *fd, int *lineCount, struct Token token) {} +enum SYNTAX_ERROR declDefProc(struct Parser *parser) { + bool isDef = false; + if (parser->token.signCode == DEF) { + isDef = true; + } + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + + if (isDef) { + if (!(parser->token.category == ID || parser->token.signCode == INIT)) { + return NO_DEF_ID; + } + // is prot + } else { + enum SYNTAX_ERROR error = declProt(parser); + if (error != NO_ERROR) { + return error; + } + } + + return NO_ERROR; +} + +enum SYNTAX_ERROR declProt(struct Parser *parser) { + if (!(parser->token.category == ID)) { + return NO_PROTO_ID; + } + + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == SIGN && parser->token.signCode == OPEN_PAR)) { + return INVALID_PROTO_PAREN_OPEN; + } + + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + enum SYNTAX_ERROR error = declProtParam(parser); + if (error != NO_ERROR) { + return error; + } + return NO_ERROR; +} + +enum SYNTAX_ERROR declProtParam(struct Parser *parser) { + + if (parser->token.category == SIGN && parser->token.signCode == REF) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + } + + if (!(parser->token.signCode == CHAR || parser->token.signCode == INT || + parser->token.signCode == REAL || parser->token.signCode == BOOL)) { + return INVALID_PROTO_PARAM_TYPE; + } + + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == SIGN && + (parser->token.signCode == OPEN_BRACK || + parser->token.signCode == COMMA))) { + return NO_PROTO_VALID_TOKEN_AFTER_TYPE; + } + + if (parser->token.category == SIGN && parser->token.signCode == OPEN_BRACK) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == SIGN && + parser->token.signCode == CLOSE_BRACK)) { + return INVALID_ARRAY_PROTO_PARAM_BRACKET_CLOSE; + } + + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == SIGN && + (parser->token.signCode == OPEN_BRACK || + parser->token.signCode == COMMA || + parser->token.signCode == CLOSE_PAR))) { + return NO_PROTO_VALID_TOKEN_AFTER_BRACKET_CLOSE; + } + + if (parser->token.category == SIGN && + parser->token.signCode == OPEN_BRACK) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == SIGN && + parser->token.signCode == CLOSE_BRACK)) { + return INVALID_ARRAY_PROTO_PARAM_BRACKET_CLOSE; + } + + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + if (!(parser->token.category == SIGN && + (parser->token.signCode == COMMA || + parser->token.signCode == CLOSE_PAR))) { + return INVALID_ARRAY_DIMENSION_DECLARATION; + } + } + + if (parser->token.category == SIGN && parser->token.signCode == COMMA) { + parser->token = lexerGetNextChar(parser->fd, parser->lineCount); + enum SYNTAX_ERROR error = declProtParam(parser); + if (error != NO_ERROR) { + return error; + } + } + } + return NO_ERROR; +} diff --git a/parser/parser.h b/parser/parser.h index 1b8f44b..224294b 100644 --- a/parser/parser.h +++ b/parser/parser.h @@ -2,17 +2,36 @@ #define PARSER_H #include "../lexer/types.h" +#include "./syntax_error.h" #include -void prog(FILE *fd, int *lineCount); +struct Parser { + struct Token token; + FILE* fd; + int *lineCount; +}; -void declListVar(FILE *fd, int *lineCount, struct Token token); -void declDefProc(FILE *fd, int *lineCount, struct Token token); +enum SYNTAX_ERROR op_rel(struct Parser *parser); -void tipo(FILE *fd, int *lineCount, struct Token token); -void declVar(FILE *fd, int *lineCount, struct Token token); +enum SYNTAX_ERROR fator(struct Parser *parser); -void arrayDeclaration(FILE *fd, int *lineCount, struct Token token); -void arrayInitialization(FILE *fd, int *lineCount, struct Token token); +enum SYNTAX_ERROR expr(struct Parser *parser); + +enum SYNTAX_ERROR arrayFator(struct Parser *parser); + +enum SYNTAX_ERROR prog(struct Parser *parser); + +enum SYNTAX_ERROR declListVar(struct Parser *parser); +enum SYNTAX_ERROR declDefProc(struct Parser *parser); + +/*void tipo(FILE *fd, int *lineCount, struct Token token);*/ +enum SYNTAX_ERROR declVar(struct Parser *parser); + +enum SYNTAX_ERROR declProt(struct Parser *parser); + +enum SYNTAX_ERROR declProtParam(struct Parser *parser); +/**/ +/*void arrayDeclaration(FILE *fd, int *lineCount, struct Token token);*/ +/*void arrayInitialization(FILE *fd, int *lineCount, struct Token token);*/ #endif diff --git a/parser/syntax_error.c b/parser/syntax_error.c new file mode 100644 index 0000000..ed47194 --- /dev/null +++ b/parser/syntax_error.c @@ -0,0 +1,156 @@ +#include "syntax_error.h" +#include +#include + +#define ERROR_QTY 83 + +// ANSI escape codes +#define RESET "\033[0m" +#define RED "\033[31m" +#define GREEN "\033[32m" + +void printSyntaxError(enum SYNTAX_ERROR error, int *lineCount) { + struct ErrorMessage messages[ERROR_QTY] = { + {NO_ERROR, "No syntax errors"}, + // prog + {INVALID_PROG_START_KEYWORD, "Invalid program start keyword"}, + // decl_list_var + {TYPE_NOT_DETECTED, "Type not detected"}, + {INVALID_TYPE, "Invalid type"}, + {INVALID_VAR_LIST_DEC, "Invalid variable list declaration"}, + // decl_var + {NO_VAR_ID, "No variable ID"}, + {NO_VAR_ASSIGNMENT, "No variable assignment"}, + {INVALID_VAR_TYPE_INIT, "Invalid type in variable initialization"}, + {INVALID_ARRAY_SUBSCRIPT_DEC, "Invalid subscript in array declaration"}, + {INVALID_ARRAY_BRACKET_DEC_OPEN, + "Invalid bracket opening in array declaration"}, + {INVALID_ARRAY_BRACKET_DEC_CLOSE, + "Invalid bracket closing in array declaration"}, + {INVALID_ARRAY_INIT_CURLY_OPEN, + "Invalid curly bracket opening in array initialization"}, + {INVALID_ARRAY_INIT_CURLY_CLOSE, + "Invalid curly bracket closing in array initialization"}, + {INVALID_ARRAY_TYPE_INIT, "Invalid type in array initialization"}, + {INVALID_ARRAY_DIMENSION_DECLARATION, + "Invalid dimension declaration in array"}, + {INVALID_ARRAY_MULTIPLE_ITEM_INIT, + "Invalid multiple item initialization in array"}, + {INVALID_ARRAY_END, + "Invalid array declaration end: expected comma or bracket closing"}, + // decl_def_proc + {INVALID_FUNCTION_KEYWORD, "Invalid function keyword"}, + {NO_FUNCTION_ID, "No valid id for function"}, + {NO_PROTO_ID, "No ID found for function prototype"}, + {INVALID_PROTO_PAREN_OPEN, "No opening paren in function prototype"}, + {INVALID_PROTO_PAREN_CLOSE, "No closing paren in function prototype"}, + {INVALID_PROTO_PARAM_TYPE, + "Invalid parameter type for function prototype"}, + {NO_PROTO_VALID_TOKEN_AFTER_TYPE, + "No valid token found after function prototype, expected bracket open " + "or comma"}, + {INVALID_ARRAY_PROTO_PARAM_BRACKET_OPEN, + "No opening bracket for function prototype array parameter"}, + {INVALID_ARRAY_PROTO_PARAM_BRACKET_CLOSE, + "No closing bracket for function prototype array parameter"}, + {INVALID_PROTO_PARAM_LIST, + "Invalid multiple parameters in function prototype"}, + {NO_PROTO_VALID_TOKEN_AFTER_BRACKET_CLOSE, + "Expected valid token after bracket closing: bracket opening, comma or " + "paren close"}, + {NO_DEF_ID, "No valid ID for function definition"}, + {INVALID_DEF_PARAM_TYPE, + "No valid parameter type for function definition"}, + {NO_DEF_PARAM_ID, "No ID detected for function definition parameter"}, + {INVALID_ARRAY_DEF_PARAM_SUBSCRIPT_TYPE, + "No valid type for subscript in array parameter of function " + "definition"}, + {INVALID_ARRAY_DEF_PARAM_BRACKET_OPEN, + "No opening bracket detected for function definition array parameter"}, + {INVALID_ARRAY_DEF_PARAM_BRACKET_CLOSE, + "No closing bracket detected for function definition array parameter"}, + {INVALID_DEF_PARAM_LIST, + "Invalid parameter list for function definition"}, + {NO_DEF_END_KEYWORD, "End keyword for function definition not detected"}, + // cmd + {INVALID_CMD_CONTENT, "Invalid command keyword or content"}, + // cmd (do) + {INVALID_FUNCTION_CALL_ID, "No valid id detected for function call"}, + {INVALID_FUNCTION_CALL_PAREN_OPEN, + "No paren open detected for function call"}, + {INVALID_FUNCTION_CALL_PAREN_CLOSE, + "No paren close detected for function call"}, + {INVALID_FUNCTION_CALL_ARGUMENT_LIST, + "Invalid argument list for function call"}, + // cmd (while) + {INVALID_WHILE_PAREN_OPEN, "No paren open after while keyword"}, + {INVALID_WHILE_PAREN_CLOSE, "No paren close after while keyword"}, + {NO_WHILE_END_KEYWORD, "Unterminated while keyword"}, + // cmd (var) + {NO_FOR_ID, "No valid id for var loop"}, + {NO_FOR_FROM, "No from keyword detected for var loop"}, + {NO_FOR_EXPR1, "No first expression detected for var loop"}, + {NO_FOR_TO_OR_DT, "No to or dt keyword detected for var loop"}, + {NO_FOR_EXPR2, "No second expression detected for var loop"}, + {INVALID_FOR_BY_INC_OR_DEC_TYPE, + "Invalid type for increment/decrement in the var loop"}, + {NO_FOR_END_KEYWORD, "No end keyword for var loop"}, + // cmd (if) + {INVALID_IF_PAREN_OPEN, "No paren opening in if conditional"}, + {INVALID_IF_PAREN_CLOSE, "No paren closing in if conditional"}, + {INVALID_ELIF_PAREN_OPEN, "No paren opening in elif conditional"}, + {INVALID_ELIF_PAREN_CLOSE, "No paren closing in elif conditional"}, + {NO_IF_END_KEYWORD, "No end keyword for if conditional"}, + // getint + {NO_GETINT_ID, "No ID detected for getint"}, + // getreal + {NO_GETREAL_ID, "NO ID detected for getreal"}, + // getchar + {NO_GETCHAR_ID, "No ID detected for getchar"}, + // getstr + {NO_GETSTR_ID, "No ID detected for getstr"}, + // putint + {INVALID_PUTINT_ELEMENT, "Invalid type for putint element"}, + // putreal + {INVALID_PUTREAL_ELEMENT, "Invalid type for putreal element"}, + // putchar + {INVALID_PUTCHAR_ELEMENT, "Invalid type for putchar element"}, + // putstr + {INVALID_PUTSTR_ELEMENT, "Invalid type for putstr element"}, + // atrib + {NO_ATRIB_ID, "No ID detected for expression assign"}, + {NO_ATRIB_ASSIGN, "No assign symbol for expression assign"}, + {NO_ATRIB_EXPR, "No expression assigned to expression assign"}, + // expr + {NO_EXPR_EXPR_SIMP, "No simple expression for expression"}, + {NO_EXPR_EXPR_SIMP_AFTER_OP_REL, "No simple expression after operation"}, + // expr_simp + {NO_EXPR_SIMP_TERM, "No term detected in simple expression"}, + {NO_EXPR_SIMP_TERM_VALID_SIGN_BEFORE, + "No valid sign detected before simple expression"}, + {NO_EXPR_SIMP_TERM_VALID_SIGN_AFTER, + "No valid sign detected after simple expression"}, + {NO_EXPR_SIMP_TERM_AFTER_VALID_SIGN, + "No valid term after valid sign in simple expression"}, + // termo + {NO_TERM_FACTOR, "No factor detected in term"}, + {NO_TERM_FACTOR_VALID_SIGN_AFTER, "Invalid sign after term"}, + {NO_TERM_FACTOR_AFTER_FACTOR_VALID_SIGN, + "No factor detected after valid sign in term"}, + // fator + {NO_FACTOR_VALID_START_SYMBOL, "No factor valid start symbol"}, + {INVALID_FACTOR_ARRAY_BRACKET_OPEN, "No factor array bracket opening"}, + {INVALID_FACTOR_ARRAY_BRACKET_CLOSE, "No factor array bracket closing"}, + {INVALID_FACTOR_EXPR_PAREN_OPEN, "No factor expression paren opening"}, + {INVALID_FACTOR_EXPR_PAREN_CLOSE, "No factor expression paren closing"}, + {NO_FACTOR_AFTER_BANG, "No factor detected after bang in factor"}, + // op_rel + {INVALID_OPERATOR, "Invalid operator in expression"}}; + + for (int i = 0; i < ERROR_QTY; i++) { + if (error == messages[i].error) { + fprintf(stderr, RED "Syntax error: %s on line %d\n" RESET, messages[i].message, + *lineCount); + } + } +} diff --git a/parser/syntax_error.h b/parser/syntax_error.h new file mode 100644 index 0000000..1055792 --- /dev/null +++ b/parser/syntax_error.h @@ -0,0 +1,123 @@ +#ifndef SYNTAX_ERROR_H +#define SYNTAX_ERROR_H + +#include "../lexer/types.h" +#include + +enum SYNTAX_ERROR { + NO_ERROR, + // prog + INVALID_PROG_START_KEYWORD, + // decl_list_var + TYPE_NOT_DETECTED, + INVALID_TYPE, + INVALID_VAR_LIST_DEC, + // decl_var + NO_VAR_ID, + NO_VAR_ASSIGNMENT, + INVALID_VAR_TYPE_INIT, + INVALID_ARRAY_SUBSCRIPT_DEC, + INVALID_ARRAY_BRACKET_DEC_OPEN, + INVALID_ARRAY_BRACKET_DEC_CLOSE, + INVALID_ARRAY_INIT_CURLY_OPEN, + INVALID_ARRAY_INIT_CURLY_CLOSE, + INVALID_ARRAY_TYPE_INIT, + INVALID_ARRAY_DIMENSION_DECLARATION, + INVALID_ARRAY_MULTIPLE_ITEM_INIT, + INVALID_ARRAY_END, + // decl_def_proc + INVALID_FUNCTION_KEYWORD, + NO_FUNCTION_ID, + NO_PROTO_ID, + INVALID_PROTO_PAREN_OPEN, + INVALID_PROTO_PAREN_CLOSE, + INVALID_PROTO_PARAM_TYPE, + NO_PROTO_VALID_TOKEN_AFTER_TYPE, + INVALID_ARRAY_PROTO_PARAM_BRACKET_OPEN, + INVALID_ARRAY_PROTO_PARAM_BRACKET_CLOSE, + INVALID_PROTO_PARAM_LIST, + NO_PROTO_VALID_TOKEN_AFTER_BRACKET_CLOSE, + NO_DEF_ID, + INVALID_DEF_PARAM_TYPE, + NO_DEF_PARAM_ID, + INVALID_ARRAY_DEF_PARAM_SUBSCRIPT_TYPE, + INVALID_ARRAY_DEF_PARAM_BRACKET_OPEN, + INVALID_ARRAY_DEF_PARAM_BRACKET_CLOSE, + INVALID_DEF_PARAM_LIST, + NO_DEF_END_KEYWORD, + // cmd + INVALID_CMD_CONTENT, + // cmd (do) + INVALID_FUNCTION_CALL_ID, + INVALID_FUNCTION_CALL_PAREN_OPEN, + INVALID_FUNCTION_CALL_PAREN_CLOSE, + INVALID_FUNCTION_CALL_ARGUMENT_LIST, + // cmd (while) + INVALID_WHILE_PAREN_OPEN, + INVALID_WHILE_PAREN_CLOSE, + NO_WHILE_END_KEYWORD, + // cmd (var): to disambiguate "var" from variable, we call it for + NO_FOR_ID, + NO_FOR_FROM, + NO_FOR_EXPR1, + NO_FOR_TO_OR_DT, + NO_FOR_EXPR2, + INVALID_FOR_BY_INC_OR_DEC_TYPE, + NO_FOR_END_KEYWORD, + // cmd (if) + INVALID_IF_PAREN_OPEN, + INVALID_IF_PAREN_CLOSE, + INVALID_ELIF_PAREN_OPEN, + INVALID_ELIF_PAREN_CLOSE, + NO_IF_END_KEYWORD, + // getint + NO_GETINT_ID, + // getreal + NO_GETREAL_ID, + // getchar + NO_GETCHAR_ID, + // getstr + NO_GETSTR_ID, + // putint + INVALID_PUTINT_ELEMENT, + // putreal + INVALID_PUTREAL_ELEMENT, + // putchar + INVALID_PUTCHAR_ELEMENT, + // putstr + INVALID_PUTSTR_ELEMENT, + // atrib + NO_ATRIB_ID, + NO_ATRIB_ASSIGN, + NO_ATRIB_EXPR, + // expr + NO_EXPR_EXPR_SIMP, + NO_EXPR_EXPR_SIMP_AFTER_OP_REL, + // expr_simp + NO_EXPR_SIMP_TERM, + NO_EXPR_SIMP_TERM_VALID_SIGN_BEFORE, + NO_EXPR_SIMP_TERM_VALID_SIGN_AFTER, + NO_EXPR_SIMP_TERM_AFTER_VALID_SIGN, + // termo + NO_TERM_FACTOR, + NO_TERM_FACTOR_VALID_SIGN_AFTER, + NO_TERM_FACTOR_AFTER_FACTOR_VALID_SIGN, + // fator + NO_FACTOR_VALID_START_SYMBOL, + INVALID_FACTOR_ARRAY_BRACKET_OPEN, + INVALID_FACTOR_ARRAY_BRACKET_CLOSE, + INVALID_FACTOR_EXPR_PAREN_OPEN, + INVALID_FACTOR_EXPR_PAREN_CLOSE, + NO_FACTOR_AFTER_BANG, + // op_rel + INVALID_OPERATOR, +}; + +void printSyntaxError(enum SYNTAX_ERROR error, int *lineCount); + +struct ErrorMessage { + enum SYNTAX_ERROR error; + const char* message; +}; + +#endif // !SYNTAX_ERROR_H diff --git a/test/lexer_test.c b/test/lexer_test.c index 667caf4..b4f18ed 100644 --- a/test/lexer_test.c +++ b/test/lexer_test.c @@ -1,6 +1,7 @@ #include "lexer_test.h" #include "../lexer/lexer.h" #include "../lexer/types.h" +#include "../lexer/transition.h" #include #include #include @@ -29,16 +30,16 @@ void lexerTest() { } assert(tokens[0].category == RSV); - assert(strcmp(tokens[0].lexeme, "def") == 0); + assert(tokens[0].signCode == DEF); assert(tokens[1].category == RSV); - assert(strcmp(tokens[1].lexeme, "init") == 0); + assert(tokens[1].signCode == INIT); assert(tokens[2].category == INTCON); assert(tokens[2].intValue == 2); assert(tokens[3].category == RSV); - assert(strcmp(tokens[3].lexeme, "endp") == 0); + assert(tokens[3].signCode == ENDP); } void lexerCharconTest() { diff --git a/test/parser_integration_tests.c b/test/parser_integration_tests.c new file mode 100644 index 0000000..baf99c1 --- /dev/null +++ b/test/parser_integration_tests.c @@ -0,0 +1,498 @@ +#include "parser_integration_tests.h" +#include +#include +#include +#include + +void progStartKeyword() { + const char *mock_data = "==\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + // for integration tests, prog itself needs a previously initialized parser + // with a token too + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + // example debugging: + // printSyntaxError(error); + assert(error == INVALID_PROG_START_KEYWORD); +} + +void declListVarInvalidType() { + const char *mock_data = "const string\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_TYPE); +} + +void declVarNoId() { + const char *mock_data = "const int 5\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == NO_VAR_ID); +} + +void declVarArrayInvalidSubscript() { + const char *mock_data = "const int i[2.2\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_SUBSCRIPT_DEC); +} + +void declVarArrayDidntClose() { + const char *mock_data = "const int i[2[\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_BRACKET_DEC_CLOSE); +} + +void declVarBadInit() { + const char *mock_data = "const int i = i\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_VAR_TYPE_INIT); +} + +void declListVarMulti() { + const char *mock_data = "const int i = 1, a = 2\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == NO_ERROR); +} + +void declListVarMultiFail() { + const char *mock_data = "const int i = 1, a = a\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_VAR_TYPE_INIT); +} + +void declVarArrayMultiTooMany() { + const char *mock_data = "int i[2][3][4]\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_DIMENSION_DECLARATION); +} + +void declVarArrayBadInitCurly() { + const char *mock_data = "const int i[2] = [1\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_INIT_CURLY_OPEN); +} + +void declVarArrayInvalidTypeInit() { + const char *mock_data = "const int i[2] = {id\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_TYPE_INIT); +} + +void declVarArrayMultiInit() { + const char *mock_data = "int i[4] = {3, 4, 4, 4}\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == NO_ERROR); +} + +void declVarArrayBadClose() { + const char *mock_data = "int i[1] = {1{\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_INIT_CURLY_CLOSE); +} + +void declDefProcNoId() { + const char *mock_data = "prot 1\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == NO_PROTO_ID); +} + +void declDefProcProtNoOpenParen() { + const char *mock_data = "prot a -\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_PROTO_PAREN_OPEN); +} + +void declDefProcProtoInvalidParamType() { + const char *mock_data = "prot a(&null)\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_PROTO_PARAM_TYPE); +} + +// this error does not exist! +// changed the error check to correct this. +// good for documentation for the grammar, i guess. +void declDefProcProtoNoParamId() { + const char *mock_data = "prot b(int 1)\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == NO_PROTO_VALID_TOKEN_AFTER_TYPE); +} + +void declDefProcProtoNoValidTokenAfterType() { + const char *mock_data = "prot c(int{\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == NO_PROTO_VALID_TOKEN_AFTER_TYPE); +} + +void declDefProcProtoUnclosedArrayParam() { + const char *mock_data = "prot d(int[[\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_PROTO_PARAM_BRACKET_CLOSE); +} + +void declDefProcProtoInvalid2dArrayOpen() { + const char *mock_data = "prot e(int[]])\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == NO_PROTO_VALID_TOKEN_AFTER_BRACKET_CLOSE); +} + +void declDefProcProtoInvalid2dArrayClose() { + const char *mock_data = "prot f(int [][[)\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_PROTO_PARAM_BRACKET_CLOSE); +} + +void declDefProcProtoInvalid3dArray() { + const char *mock_data = "prot g(int [][][)\n"; + FILE *mock_file = fmemopen((void*)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token + }; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_DIMENSION_DECLARATION); +} + +void declDefProcProtoMultiParams() { + const char *mock_data = "prot h(int [], int[], int[][][])\n"; + + FILE *mock_file = fmemopen((void*)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token + }; + + enum SYNTAX_ERROR error = prog(&parser); + assert(error == INVALID_ARRAY_DIMENSION_DECLARATION); +} diff --git a/test/parser_integration_tests.h b/test/parser_integration_tests.h new file mode 100644 index 0000000..311dd83 --- /dev/null +++ b/test/parser_integration_tests.h @@ -0,0 +1,36 @@ +#ifndef PARSER_INTEGRATION_TESTS_H +#define PARSER_INTEGRATION_TESTS_H + +#include "../parser/parser.h" +#include "../parser/syntax_error.h" +#include "../lexer/lexer.h" +#include "../lexer/types.h" + +void progStartKeyword(); + +void declListVarInvalidType(); +void declVarNoId(); +void declListVarMulti(); +void declListVarMultiFail(); + +void declVarArrayInvalidSubscript(); +void declVarArrayDidntClose(); +void declVarBadInit(); +void declVarArrayBadInitCurly(); +void declVarArrayMultiTooMany(); +void declVarArrayInvalidTypeInit(); +void declVarArrayMultiInit(); +void declVarArrayBadClose(); + +void declDefProcNoId(); +void declDefProcProtNoOpenParen(); +void declDefProcProtoInvalidParamType(); +void declDefProcProtoNoParamId(); +void declDefProcProtoNoValidTokenAfterType(); +void declDefProcProtoUnclosedArrayParam(); +void declDefProcProtoInvalid2dArrayOpen(); +void declDefProcProtoInvalid2dArrayClose(); +void declDefProcProtoInvalid3dArray(); +void declDefProcProtoMultiParams(); + +#endif // !PARSER_INTEGRATION_TESTS_H diff --git a/test/parser_unit_tests.c b/test/parser_unit_tests.c new file mode 100644 index 0000000..7d3c9cc --- /dev/null +++ b/test/parser_unit_tests.c @@ -0,0 +1,243 @@ +#include "parser_unit_tests.h" +#include +#include +#include +#include + +void opRelTest() { + const char *mock_data = "==\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + // For unit tests of individual functions like this one, that don't involve + // consuming tokens before, let's manually call the lexer (in the final + // program, tokens will be consumed before the function) + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = op_rel(&parser); + // example debugging: + // printSyntaxError(error); + assert(error == NO_ERROR); +} + +void opRelTest2() { + const char *mock_data = "=!\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = op_rel(&parser); + assert(error == INVALID_OPERATOR); +} + +void fatorConTest() { + const char *mock_data = "2 2.2 'a' +\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR intcon = fator(&parser); + assert(intcon == NO_ERROR); + + token = lexerGetNextChar(mock_file, lineCount); + parser.token = token; + enum SYNTAX_ERROR realcon = fator(&parser); + assert(intcon == NO_ERROR); + + token = lexerGetNextChar(mock_file, lineCount); + parser.token = token; + enum SYNTAX_ERROR charcon = fator(&parser); + assert(charcon == NO_ERROR); + + token = lexerGetNextChar(mock_file, lineCount); + parser.token = token; + enum SYNTAX_ERROR error = fator(&parser); + assert(error == NO_FACTOR_VALID_START_SYMBOL); +} + +void fatorNegFatorTest() { + const char *mock_data = "2 !2.2 !)\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR intcon = fator(&parser); + assert(intcon == NO_ERROR); + + token = lexerGetNextChar(mock_file, lineCount); + parser.token = token; + enum SYNTAX_ERROR realcon = fator(&parser); + assert(realcon == NO_ERROR); + + token = lexerGetNextChar(mock_file, lineCount); + parser.token = token; + enum SYNTAX_ERROR error = fator(&parser); + assert(error == NO_FACTOR_AFTER_BANG); +} + +void fatorArrayOutroTest() { + // is this test literally useless? + const char *mock_data = "id]\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR id = fator(&parser); + assert(id == INVALID_FACTOR_ARRAY_BRACKET_OPEN); +} + +void fatorArrayOutroTest2() { + const char *mock_data = "id[1[\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR id = fator(&parser); + assert(id == INVALID_FACTOR_ARRAY_BRACKET_CLOSE); +} + +void fatorArrayUniTest() { + const char *mock_data = "id[1] id] id[1[\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR id = fator(&parser); + assert(id == NO_ERROR); + + token = lexerGetNextChar(mock_file, lineCount); + parser.token = token; + enum SYNTAX_ERROR error = fator(&parser); + assert(error == INVALID_FACTOR_ARRAY_BRACKET_OPEN); + + token = lexerGetNextChar(mock_file, lineCount); + parser.token = token; + enum SYNTAX_ERROR error2 = fator(&parser); + assert(error2 == INVALID_FACTOR_ARRAY_BRACKET_CLOSE); +} + +void fatorSingle() { + const char *mock_data = "fator funciona bem\n"; + FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); + + if (mock_file == NULL) { + fprintf(stderr, "Error opening source file.\n"); + exit(EXIT_FAILURE); + } + + int *lineCount; + int line = 1; + lineCount = &line; + struct Token token = lexerGetNextChar(mock_file, lineCount); + struct Parser parser = { + .fd = mock_file, .lineCount = lineCount, .token = token}; + + enum SYNTAX_ERROR error = fator(&parser); + assert(error == NO_ERROR); + + // no consuming here: already consumed + enum SYNTAX_ERROR error1 = fator(&parser); + assert(error1 == NO_ERROR); + + // no consuming here: already consumed + enum SYNTAX_ERROR error2 = fator(&parser); + assert(error2 == NO_ERROR); +} +/**/ +/*void fatorArrayMultTest() {*/ +/* // TODO: ?*/ +/* const char *mock_data = "id[1][1] id[3]] id[3][4[\n";*/ +/**/ +/* FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r");*/ +/**/ +/* if (mock_file == NULL) {*/ +/* fprintf(stderr, "Error opening source file.\n");*/ +/* exit(EXIT_FAILURE);*/ +/* }*/ +/**/ +/* int *lineCount;*/ +/* int line = 1;*/ +/* lineCount = &line;*/ +/* struct Token token = lexerGetNextChar(mock_file, lineCount);*/ +/* struct Parser parser = {*/ +/* .fd = mock_file, .lineCount = lineCount, .token = token};*/ +/**/ +/* enum SYNTAX_ERROR error = fator(parser);*/ +/* assert(error == NO_ERROR);*/ +/*}*/ diff --git a/test/parser_unit_tests.h b/test/parser_unit_tests.h new file mode 100644 index 0000000..af5a2d5 --- /dev/null +++ b/test/parser_unit_tests.h @@ -0,0 +1,19 @@ +#ifndef PARSER_UNIT_TESTS_H +#define PARSER_UNIT_TESTS_H + +#include "../parser/parser.h" +#include "../parser/syntax_error.h" +#include "../lexer/lexer.h" +#include "../lexer/types.h" + +void opRelTest(); +void opRelTest2(); +void fatorConTest(); +void fatorNegFatorTest(); +void fatorArrayOutroTest(); +void fatorArrayOutroTest2(); +void fatorArrayUniTest(); +void fatorArrayMultTest(); +void fatorSingle(); + +#endif // !PARSER_UNIT_TESTS_H diff --git a/test/test.c b/test/test.c index b4a375f..dd76bb3 100644 --- a/test/test.c +++ b/test/test.c @@ -1,4 +1,6 @@ #include "lexer_test.h" +#include "parser_integration_tests.h" +#include "parser_unit_tests.h" #include #include @@ -7,7 +9,48 @@ int main(void) { lexerCharconTest(); lexerCharconTest2(); - printf("--- Lexer test passed\n"); + printf("--- Lexer tests passed\n"); + + opRelTest(); + opRelTest2(); + fatorConTest(); + fatorNegFatorTest(); + fatorArrayOutroTest(); + fatorArrayOutroTest2(); + fatorArrayUniTest(); + fatorSingle(); + declVarArrayBadInitCurly(); + // fatorArrayMultTest(); + + printf("--- Parser unit tests passed\n"); + + progStartKeyword(); + + declListVarInvalidType(); + declVarNoId(); + declListVarMulti(); + declListVarMultiFail(); + declVarArrayInvalidSubscript(); + declVarArrayDidntClose(); + declVarBadInit(); + declVarArrayBadInitCurly(); + declVarArrayMultiTooMany(); + declVarArrayInvalidTypeInit(); + declVarArrayMultiInit(); + declVarArrayBadClose(); + + declDefProcNoId(); + declDefProcProtNoOpenParen(); + declDefProcProtoInvalidParamType(); + declDefProcProtoNoParamId(); + declDefProcProtoNoValidTokenAfterType(); + declDefProcProtoUnclosedArrayParam(); + declDefProcProtoInvalid2dArrayOpen(); + declDefProcProtoInvalid2dArrayClose(); + declDefProcProtoInvalid3dArray(); + declDefProcProtoMultiParams(); + + printf("--- Parser integration tests passed\n"); return EXIT_SUCCESS; }