diff --git a/README.md b/README.md index e338acf..d96dc6b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ![swag](./doc/swag.webp) -![diagram](./doc/flap2.png) +![diagram](./doc/flap3.png) # uso @@ -36,8 +36,10 @@ # problemas -- [ ] Remover transition.h e transition.c -- [ ] Setup GitHub Actions para testes +- [ ] Adicionar especificacao da linguagem `proc` e manter atualizada +- [ ] Refactor: remover transition.h e transition.c +- [ ] Refactor: organizar melhor as enums... +- [X] Setup GitHub Actions para testes - [X] automato nao funciona - [X] funciona corretamente - [X] id diff --git a/doc/examples/code.proc b/doc/examples/code.proc index 2e0ed33..6dda013 100644 --- a/doc/examples/code.proc +++ b/doc/examples/code.proc @@ -2,3 +2,4 @@ var a = 80 // something if (a == 80) // something else for (int i = 0) 'a' '\n' banana '\0' 4.4 +int a[2] = {1, 2} diff --git a/doc/flap3.jff b/doc/flap3.jff new file mode 100644 index 0000000..c6654e9 --- /dev/null +++ b/doc/flap3.jff @@ -0,0 +1,534 @@ + + fa + + + + 877.0 + 320.0 + + + 755.0 + 186.0 + + + 645.0 + 136.0 + + + 490.0 + 29.0 + + + + + 869.0 + 136.0 + + + 792.0 + 32.0 + + + + + 985.0 + 169.0 + + + 1046.0 + 87.0 + + + 1179.0 + 28.0 + + + + + 1070.0 + 243.0 + + + 1212.0 + 121.0 + + + 1190.0 + 235.0 + + + 1326.0 + 304.0 + + + 1452.0 + 141.0 + + + + + 1122.0 + 383.0 + + + 1348.0 + 417.0 + + + + + 1259.0 + 471.0 + + + 1332.0 + 559.0 + + + 1087.0 + 520.0 + + + 1110.0 + 690.0 + + + + + 1041.0 + 676.0 + + + + + 979.0 + 687.0 + + + + + 1244.0 + 673.0 + + + + + 921.0 + 670.0 + + + + + 867.0 + 694.0 + + + + + 810.0 + 683.0 + + + + + 743.0 + 701.0 + + + + + 665.0 + 702.0 + + + + + 535.0 + 173.0 + + + 419.0 + 48.0 + + + + + 307.0 + 48.0 + + + + + 464.0 + 217.0 + + + 208.0 + 53.0 + + + + + 92.0 + 52.0 + + + + + 386.0 + 256.0 + + + 39.0 + 132.0 + + + + + 468.0 + 346.0 + + + 85.0 + 249.0 + + + + + 93.0 + 352.0 + + + + + 511.0 + 422.0 + + + 81.0 + 434.0 + + + + + 164.0 + 485.0 + + + + + 455.0 + 504.0 + + + 257.0 + 537.0 + + + + + 339.0 + 625.0 + + + + + 497.0 + 645.0 + + + + + 570.0 + 691.0 + + + + + + 14 + 16 + ch + + + 14 + 15 + aspas + + + 2 + 3 + outro* + + + 0 + 45 + abrechave + + + 28 + 29 + = + + + 0 + 0 + tab, espaco, \n + + + 0 + 2 + letra + + + 0 + 31 + & + + + 6 + 7 + digito + + + 0 + 27 + virgula + + + 9 + 10 + ch + + + 0 + 21 + multiplicacao + + + 2 + 2 + letra, digito, sublinha + + + 4 + 5 + outro* + + + 18 + 0 + \n + + + 39 + 40 + = + + + 42 + 44 + outro* + + + 0 + 46 + fechachave + + + 0 + 39 + > + + + 1 + 1 + sublinha + + + 0 + 9 + apostrofo + + + 31 + 33 + outro* + + + 0 + 28 + = + + + 9 + 11 + \ + + + 34 + 35 + | + + + 0 + 34 + | + + + 11 + 12 + 0, n + + + 17 + 18 + barra + + + 0 + 1 + sublinha + + + 12 + 13 + apostrofo + + + 0 + 14 + aspas + + + 0 + 26 + fechacolchete + + + 31 + 32 + & + + + 36 + 38 + outro* + + + 0 + 19 + mais + + + 0 + 25 + abrecolchete + + + 16 + 15 + aspas + + + 0 + 24 + fechaparenteses + + + 16 + 16 + ch + + + 0 + 4 + digito + + + 0 + 23 + abreparenteses + + + 10 + 13 + apostrofo + + + 1 + 2 + letra + + + 39 + 41 + outro* + + + 0 + 42 + ! + + + 18 + 18 + outro* + + + 36 + 37 + = + + + 0 + 17 + barra + + + 4 + 6 + ponto + + + 28 + 30 + outro* + + + 17 + 22 + outro* + + + 0 + 20 + menos + + + 42 + 43 + = + + + 7 + 8 + outro* + + + 0 + 36 + < + + + 4 + 4 + digito + + + 7 + 7 + digito + + + \ No newline at end of file diff --git a/doc/flap3.png b/doc/flap3.png new file mode 100644 index 0000000..a95a90d Binary files /dev/null and b/doc/flap3.png differ diff --git a/lexer/char.c b/lexer/char.c index cf3f2ca..da58270 100644 --- a/lexer/char.c +++ b/lexer/char.c @@ -83,3 +83,11 @@ bool isNotDigitNorPeriod(char c) { return isNotDigit(c) && c != '.'; } bool isZeroOrN(char c) { return c == '0' || c == 'n'; } bool isIsPrintButNotBackSlash(char c) { return isprint(c) && c != '\\'; } + +bool isCurlyOpen(char c) { + return c == '{'; +} + +bool isCurlyClose(char c) { + return c == '}'; +} diff --git a/lexer/char.h b/lexer/char.h index 75895dd..85ed975 100644 --- a/lexer/char.h +++ b/lexer/char.h @@ -42,5 +42,7 @@ bool isNotRef(char c); bool isNotDigitNorPeriod(char c); bool isZeroOrN(char c); bool isIsPrintButNotBackSlash(char c); +bool isCurlyOpen(char c); +bool isCurlyClose(char c); #endif // !CHAR_H diff --git a/lexer/lexer.c b/lexer/lexer.c index a09619b..dd50e85 100644 --- a/lexer/lexer.c +++ b/lexer/lexer.c @@ -2,14 +2,14 @@ #include "./char.h" #include "transition.h" #include "types.h" +#include #include #include #include -#include #define MAX_LEXEME_SIZE 50 -#define MAX_STATES 44 -#define MAX_TRANSITIONS 20 +#define MAX_STATES 46 +#define MAX_TRANSITIONS 22 void error(char msg[]) { printf("%s\n", msg); @@ -41,7 +41,9 @@ struct Token lexerGetNextChar(FILE *fd, int *lineCount) { {34, isPipe, NON_ACCEPTING, NOT_OTHER, NOT_SYMBOL}, {36, isSmallerThan, NON_ACCEPTING, NOT_OTHER, NOT_SYMBOL}, {39, isLargerThan, NON_ACCEPTING, NOT_OTHER, NOT_SYMBOL}, - {42, isNeg, NON_ACCEPTING, NOT_OTHER, NOT_SYMBOL}}, + {42, isNeg, NON_ACCEPTING, NOT_OTHER, NOT_SYMBOL}, + {45, isCurlyOpen, SIGN, NOT_OTHER, OPEN_CURLY}, + {46, isCurlyClose, SIGN, NOT_OTHER, CLOSE_CURLY}}, // State 1 {{1, isUnderscore, NON_ACCEPTING, NOT_OTHER, NOT_SYMBOL}, {2, isAlpha, NON_ACCEPTING, NOT_OTHER, NOT_SYMBOL}}, diff --git a/lexer/printer.c b/lexer/printer.c index d3aa3c4..51fe1c6 100644 --- a/lexer/printer.c +++ b/lexer/printer.c @@ -3,7 +3,7 @@ #include #define TOKEN_CATEGORY_QTY 7 -#define SIGN_CATEGORY_QTY 20 +#define SIGN_CATEGORY_QTY 22 struct TokenCategoryHandler tokenCategoryHandleData[TOKEN_CATEGORY_QTY] = { {RSV, handleRsv}, {ID, handleId}, @@ -32,7 +32,8 @@ struct SignCategoryHandler signCategoryHandleData[SIGN_CATEGORY_QTY] = { {DIFFERENT, "DIFFERENT"}, {SMALLER_EQUAL, "SMALLER_EQUAL"}, {LARGER_EQUAL, "LARGER_EQUAL"}, -}; + {OPEN_CURLY, "OPEN_CURLY"}, + {CLOSE_CURLY, "CLOSE_CURLY"}}; void handleRsv(struct Token token) { printf("", token.lexeme); } diff --git a/lexer/printer.h b/lexer/printer.h index a98392c..21bd096 100644 --- a/lexer/printer.h +++ b/lexer/printer.h @@ -26,7 +26,7 @@ void handleSign(struct Token token); struct SignCategoryHandler { int signCategory; - const char* signCategoryLabel; + const char *signCategoryLabel; }; void printToken(struct Token token); diff --git a/lexer/transition.c b/lexer/transition.c index 3f87a55..beba8b3 100644 --- a/lexer/transition.c +++ b/lexer/transition.c @@ -1,3 +1,4 @@ +#include "transition.h" #include "./types.h" #include @@ -5,6 +6,8 @@ #include #include +#define KEYWORD_QTY 31 + /* * TOKEN BUILDING * NON ACCEPTING STATE: Increments the token (lexeme or number digits) depending @@ -52,16 +55,24 @@ bool handleTransitionAndWasTokenBuilt(FILE *fd, char ch, struct Token *token, } else if (token->category == ID || token->category == STRINGCON) { strcpy(token->lexeme, lexeme); // look up reserved - const char *reservedKeywords[31] = { - "const", "init", "endp", "char", "int", "real", - "bool", "do", "while", "endw", "var", "from", - "to", "dt", "by", "if", "endv", "elif", - "else", "endi", "getout", "getint", "getchar", "getreal", - "putint", "putchar", "putreal", "getstr", "putstr", "def", - "prot"}; - for (int i = 0; i < 28; i++) { - if (strcmp(token->lexeme, reservedKeywords[i]) == 0) { + struct ReservedWord reservedKeywords[KEYWORD_QTY] = { + {"const", CONST}, {"init", INIT}, {"endp", ENDP}, + {"char", CHAR}, {"int", INT}, {"real", REAL}, + {"bool", BOOL}, {"do", DO}, {"while", WHILE}, + {"endw", ENDW}, {"var", VAR}, {"from", FROM}, + {"to", TO}, {"dt", DT}, {"by", BY}, + {"if", IF}, {"endv", ENDV}, {"elif", ELIF}, + {"else", ELSE}, {"endi", ENDI}, {"getout", GETOUT}, + {"getint", GETINT}, {"getchar", GETCHAR}, {"getreal", GETREAL}, + {"putint", PUTINT}, {"putchar", PUTCHAR}, {"putreal", PUTREAL}, + {"getstr", GETSTR}, {"putstr", PUTSTR}, {"def", DEF}, + {"prot", PROT}}; + + for (int i = 0; i < KEYWORD_QTY; i++) { + if (strcmp(token->lexeme, reservedKeywords[i].lexeme) == 0) { token->category = RSV; + token->signCode = reservedKeywords[i].reservedCode; + strcpy(token->lexeme, reservedKeywords[i].lexeme); } } } else if (token->category == CHARCON) { diff --git a/lexer/transition.h b/lexer/transition.h index 88da058..93949e7 100644 --- a/lexer/transition.h +++ b/lexer/transition.h @@ -1,11 +1,52 @@ #ifndef TRANSITION_H #define TRANSITION_H -#include -#include #include "./types.h" +#include +#include + +bool handleTransitionAndWasTokenBuilt(FILE *fd, char ch, struct Token *token, + char *lexeme, int *lexemeSize, + struct Transition transition, + int *lineCount, int state); + +enum RESERVED { + CONST, + INIT, + ENDP, + CHAR, + INT, + REAL, + BOOL, + DO, + WHILE, + ENDW, + VAR, + FROM, + TO, + DT, + BY, + IF, + ENDV, + ELIF, + ELSE, + ENDI, + GETOUT, + GETINT, + GETCHAR, + GETREAL, + PUTINT, + PUTCHAR, + PUTREAL, + GETSTR, + PUTSTR, + DEF, + PROT +}; -bool handleTransitionAndWasTokenBuilt(FILE *fd, char ch, struct Token *token, char *lexeme, - int *lexemeSize, struct Transition transition, int *lineCount, int state); +struct ReservedWord { + const char *lexeme; + enum RESERVED reservedCode; +}; #endif // !TRANSITION_H diff --git a/lexer/types.h b/lexer/types.h index 583e6a2..615bb58 100644 --- a/lexer/types.h +++ b/lexer/types.h @@ -42,13 +42,12 @@ enum SIGN { NEGATION, DIFFERENT, SMALLER_EQUAL, - LARGER_EQUAL + LARGER_EQUAL, + OPEN_CURLY, + CLOSE_CURLY }; -enum IS_OTHER { - IS_OTHER, - NOT_OTHER -}; +enum IS_OTHER { IS_OTHER, NOT_OTHER }; struct Token { enum TOKEN_CAT category; diff --git a/main.c b/main.c index e73ee37..30fd3b8 100644 --- a/main.c +++ b/main.c @@ -12,6 +12,7 @@ int main(int argc, char *argv[]) { } FILE *fd; + int lineCount = 1; fd = fopen(argv[1], "r"); if (fd == NULL) { @@ -19,8 +20,6 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - int lineCount = 1; - // lexing! while (true) { struct Token token = lexerGetNextChar(fd, &lineCount); diff --git a/parser/parser.c b/parser/parser.c new file mode 100644 index 0000000..b49668f --- /dev/null +++ b/parser/parser.c @@ -0,0 +1,141 @@ +#include "parser.h" +#include "../lexer/lexer.h" +#include "../lexer/transition.h" +#include +#include + +/** + * prog accepts repetitions of declarations of variables (decl_list_var), or + * procedures (decl_list_proc). + */ +void prog(FILE *fd, int *lineCount) { + struct Token token = lexerGetNextChar(fd, lineCount); + + // Both declaration of variables and procedures start with reserved words. + if (token.category == RSV) { + // Valid variable declaration start tokens + while (token.signCode == CONST || token.signCode == CHAR || + token.signCode == INT || token.signCode == REAL || + token.signCode == BOOL) { + declListVar(fd, lineCount, token); + } + // Valid procedure declaration/definition tokens + while (token.signCode == DEF || token.signCode == PROT) { + declDefProc(fd, lineCount, token); + } + } +} + +/** + * decl_list_var accepts optionally a `const`, followed by variable type, and + * declaration of one or more variables. + */ +void declListVar(FILE *fd, int *lineCount, struct Token token) { + // if const, read next + if (token.signCode == CONST) { + token = lexerGetNextChar(fd, lineCount); + } + // get remaining parts of variable declaration + tipo(fd, lineCount, token); + declVar(fd, lineCount, token); + // can accept more variable declarations + while (token.category == SIGN && token.signCode == COMMA) { + // consume next + token = lexerGetNextChar(fd, lineCount); + declVar(fd, lineCount, token); + } +} + +/** + * type must be valid + */ +void tipo(FILE *fd, int *lineCount, struct Token token) { + if (token.signCode == CHAR || token.signCode == INT || + token.signCode == REAL || token.signCode == BOOL) { + struct Token token = lexerGetNextChar(fd, lineCount); + + } else { + // report error if type not detected + fprintf(stderr, "Syntax error: variable type not detected\n"); + exit(EXIT_FAILURE); + } +} + +/** + * We already dealt with type. + * Variable declaration is valid: + * Simply declaring a identifier, or optionally initializing it. + * Can declare as array using curly braces with size given by number or id. Can + * optionally initialize it with square brackets. + */ +void declVar(FILE *fd, int *lineCount, struct Token token) { + // id mandatory. + if (token.category != ID) { + fprintf(stderr, "Syntax error: expected variable ID\n"); + exit(EXIT_FAILURE); + } + // consume next + token = lexerGetNextChar(fd, lineCount); + + // handle simple variable or array + // simple variable + if (token.category == SIGN && token.signCode == ASSIGN) { + // assignment detected, check next token + token = lexerGetNextChar(fd, lineCount); + if (token.category == CHARCON || token.category == STRINGCON || + token.category == INTCON || token.category == REALCON) { + // consume next + token = lexerGetNextChar(fd, lineCount); + } else { + fprintf(stderr, "Syntax error: expected valid variable initialization\n"); + exit(EXIT_FAILURE); + } + + // array: should deal with multidimensional arrays (declaration) and + // initialization + } else if (token.category == SIGN && token.signCode == OPEN_BRACK) { + arrayDeclaration(fd, lineCount, token); + } +} + +void arrayDeclaration(FILE *fd, int *lineCount, struct Token token) { + while (token.category == SIGN && token.signCode == OPEN_BRACK) { + // consume next + token = lexerGetNextChar(fd, lineCount); + // valid array initializer + if (token.category != ID && token.category != INTCON) { + fprintf(stderr, "Syntax error: expected valid array size\n"); + exit(EXIT_FAILURE); + } else { + // consume next + token = lexerGetNextChar(fd, lineCount); + if (token.category != SIGN && token.signCode != CLOSE_BRACK) { + + fprintf(stderr, "Syntax error: expected array bracket closing\n"); + exit(EXIT_FAILURE); + } else { + token = lexerGetNextChar(fd, lineCount); + + if (token.category == SIGN && token.signCode == OPEN_BRACK) { + // dealing with multidimensional arrays + continue; + + } else if (token.category == SIGN && token.signCode == ASSIGN) { + // consume next + token = lexerGetNextChar(fd, lineCount); + if (token.category != SIGN && token.signCode != OPEN_CURLY) { + + } + // it's optional in the grammar. + arrayInitialization(fd, lineCount, token); + break; + + } else { + // no array declaration. what to do? + } + } + } + } +} + +void arrayInitialization(FILE *fd, int *lineCount, struct Token token) {} diff --git a/parser/parser.h b/parser/parser.h new file mode 100644 index 0000000..1b8f44b --- /dev/null +++ b/parser/parser.h @@ -0,0 +1,18 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "../lexer/types.h" +#include + +void prog(FILE *fd, int *lineCount); + +void declListVar(FILE *fd, int *lineCount, struct Token token); +void declDefProc(FILE *fd, int *lineCount, struct Token token); + +void tipo(FILE *fd, int *lineCount, struct Token token); +void declVar(FILE *fd, int *lineCount, struct Token token); + +void arrayDeclaration(FILE *fd, int *lineCount, struct Token token); +void arrayInitialization(FILE *fd, int *lineCount, struct Token token); + +#endif diff --git a/test/lexer_test.c b/test/lexer_test.c index 07e7f34..667caf4 100644 --- a/test/lexer_test.c +++ b/test/lexer_test.c @@ -1,13 +1,13 @@ #include "lexer_test.h" +#include "../lexer/lexer.h" +#include "../lexer/types.h" #include #include #include #include -#include "../lexer/types.h" -#include "../lexer/lexer.h" void lexerTest() { - const char *mock_data = "pr init 2 endp\n"; + const char *mock_data = "def init 2 endp\n"; FILE *mock_file = fmemopen((void *)mock_data, strlen(mock_data), "r"); if (mock_file == NULL) { @@ -29,7 +29,7 @@ void lexerTest() { } assert(tokens[0].category == RSV); - assert(strcmp(tokens[0].lexeme, "pr") == 0); + assert(strcmp(tokens[0].lexeme, "def") == 0); assert(tokens[1].category == RSV); assert(strcmp(tokens[1].lexeme, "init") == 0); diff --git a/test/test.c b/test/test.c index e64924c..b4a375f 100644 --- a/test/test.c +++ b/test/test.c @@ -1,6 +1,6 @@ #include "lexer_test.h" -#include #include +#include int main(void) { lexerTest();