diff --git a/README.md b/README.md index ba670b7..dba667b 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,20 @@ -c4 - C in four functions -======================== - +c5 - C in four functions + AST + back-end code generator +======================================================== An exercise in minimalism. +This branch extends **c4.c** by adding: + + * Abstract Syntax Tree creation + * Back-end code generator function: **gen()** + * Standard ordering of function parameters on stack + * Native x86 version: **c5x86.c** + * Various optimizations + Try the following: - gcc -o c4 c4.c (you may need the -m32 option on 64bit machines) - ./c4 hello.c - ./c4 -s hello.c + gcc -o c5 c5.c (you may need the -m32 option on 64bit machines) + ./c5 hello.c + ./c5 -s hello.c - ./c4 c4.c hello.c - ./c4 c4.c c4.c hello.c - + ./c5 c5.c hello.c + ./c5 c5.c c5.c hello.c diff --git a/c4.c b/c4.c deleted file mode 100644 index bebeec1..0000000 --- a/c4.c +++ /dev/null @@ -1,525 +0,0 @@ -// c4.c - C in four functions - -// char, int, and pointer types -// if, while, return, and expression statements -// just enough features to allow self-compilation and a bit more - -// Written by Robert Swierczek - -#include -#include -#include -#include - -char *p, *lp, // current position in source code - *data; // data/bss pointer - -int *e, *le, // current position in emitted code - *id, // currently parsed identifier - *sym, // symbol table (simple list of identifiers) - tk, // current token - ival, // current token value - ty, // current expression type - loc, // local variable offset - line, // current line number - src, // print source and assembly flag - debug; // print executed instructions - -// tokens and classes (operators last and in precedence order) -enum { - Num = 128, Fun, Sys, Glo, Loc, Id, - Char, Else, Enum, If, Int, Return, Sizeof, While, - Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak -}; - -// opcodes -enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , - OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , - OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT }; - -// types -enum { CHAR, INT, PTR }; - -// identifier offsets (since we can't create an ident struct) -enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; - -void next() -{ - char *pp; - - while (tk = *p) { - ++p; - if (tk == '\n') { - if (src) { - printf("%d: %.*s", line, p - lp, lp); - lp = p; - while (le < e) { - printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," - "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," - "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[*++le * 5]); - if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n"); - } - } - ++line; - } - else if (tk == '#') { - while (*p != 0 && *p != '\n') ++p; - } - else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { - pp = p - 1; - while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') - tk = tk * 147 + *p++; - tk = (tk << 6) + (p - pp); - id = sym; - while (id[Tk]) { - if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } - id = id + Idsz; - } - id[Name] = (int)pp; - id[Hash] = tk; - tk = id[Tk] = Id; - return; - } - else if (tk >= '0' && tk <= '9') { - if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } - else if (*p == 'x' || *p == 'X') { - while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) - ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); - } - else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } - tk = Num; - return; - } - else if (tk == '/') { - if (*p == '/') { - ++p; - while (*p != 0 && *p != '\n') ++p; - } - else { - tk = Div; - return; - } - } - else if (tk == '\'' || tk == '"') { - pp = data; - while (*p != 0 && *p != tk) { - if ((ival = *p++) == '\\') { - if ((ival = *p++) == 'n') ival = '\n'; - } - if (tk == '"') *data++ = ival; - } - ++p; - if (tk == '"') ival = (int)pp; else tk = Num; - return; - } - else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } - else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } - else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } - else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } - else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } - else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } - else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } - else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } - else if (tk == '^') { tk = Xor; return; } - else if (tk == '%') { tk = Mod; return; } - else if (tk == '*') { tk = Mul; return; } - else if (tk == '[') { tk = Brak; return; } - else if (tk == '?') { tk = Cond; return; } - else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; - } -} - -void expr(int lev) -{ - int t, *d; - - if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } - else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } - else if (tk == '"') { - *++e = IMM; *++e = ival; next(); - while (tk == '"') next(); - data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; - } - else if (tk == Sizeof) { - next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } - ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } - while (tk == Mul) { next(); ty = ty + PTR; } - if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } - *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); - ty = INT; - } - else if (tk == Id) { - d = id; next(); - if (tk == '(') { - next(); - t = 0; - while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } - next(); - if (d[Class] == Sys) *++e = d[Val]; - else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } - else { printf("%d: bad function call\n", line); exit(-1); } - if (t) { *++e = ADJ; *++e = t; } - ty = d[Type]; - } - else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } - else { - if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } - else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } - else { printf("%d: undefined variable\n", line); exit(-1); } - *++e = ((ty = d[Type]) == CHAR) ? LC : LI; - } - } - else if (tk == '(') { - next(); - if (tk == Int || tk == Char) { - t = (tk == Int) ? INT : CHAR; next(); - while (tk == Mul) { next(); t = t + PTR; } - if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } - expr(Inc); - ty = t; - } - else { - expr(Assign); - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } - } - } - else if (tk == Mul) { - next(); expr(Inc); - if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } - *++e = (ty == CHAR) ? LC : LI; - } - else if (tk == And) { - next(); expr(Inc); - if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } - ty = ty + PTR; - } - else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } - else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } - else if (tk == Add) { next(); expr(Inc); ty = INT; } - else if (tk == Sub) { - next(); *++e = IMM; - if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } - ty = INT; - } - else if (tk == Inc || tk == Dec) { - t = tk; next(); expr(Inc); - if (*e == LC) { *e = PSH; *++e = LC; } - else if (*e == LI) { *e = PSH; *++e = LI; } - else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } - *++e = PSH; - *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); - *++e = (t == Inc) ? ADD : SUB; - *++e = (ty == CHAR) ? SC : SI; - } - else { printf("%d: bad expression\n", line); exit(-1); } - - while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method - t = ty; - if (tk == Assign) { - next(); - if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } - expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; - } - else if (tk == Cond) { - next(); - *++e = BZ; d = ++e; - expr(Assign); - if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } - *d = (int)(e + 3); *++e = JMP; d = ++e; - expr(Cond); - *d = (int)(e + 1); - } - else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } - else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } - else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } - else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } - else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } - else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } - else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } - else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } - else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } - else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } - else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } - else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } - else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } - else if (tk == Add) { - next(); *++e = PSH; expr(Mul); - if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } - *++e = ADD; - } - else if (tk == Sub) { - next(); *++e = PSH; expr(Mul); - if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } - else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } - else *++e = SUB; - } - else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } - else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } - else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } - else if (tk == Inc || tk == Dec) { - if (*e == LC) { *e = PSH; *++e = LC; } - else if (*e == LI) { *e = PSH; *++e = LI; } - else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } - *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); - *++e = (tk == Inc) ? ADD : SUB; - *++e = (ty == CHAR) ? SC : SI; - *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); - *++e = (tk == Inc) ? SUB : ADD; - next(); - } - else if (tk == Brak) { - next(); *++e = PSH; expr(Assign); - if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } - if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } - else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } - *++e = ADD; - *++e = ((ty = t - PTR) == CHAR) ? LC : LI; - } - else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } - } -} - -void stmt() -{ - int *a, *b; - - if (tk == If) { - next(); - if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } - expr(Assign); - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } - *++e = BZ; b = ++e; - stmt(); - if (tk == Else) { - *b = (int)(e + 3); *++e = JMP; b = ++e; - next(); - stmt(); - } - *b = (int)(e + 1); - } - else if (tk == While) { - next(); - a = e + 1; - if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } - expr(Assign); - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } - *++e = BZ; b = ++e; - stmt(); - *++e = JMP; *++e = (int)a; - *b = (int)(e + 1); - } - else if (tk == Return) { - next(); - if (tk != ';') expr(Assign); - *++e = LEV; - if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } - } - else if (tk == '{') { - next(); - while (tk != '}') stmt(); - next(); - } - else if (tk == ';') { - next(); - } - else { - expr(Assign); - if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } - } -} - -int main(int argc, char **argv) -{ - int fd, bt, ty, poolsz, *idmain; - int *pc, *sp, *bp, a, cycle; // vm registers - int i, *t; // temps - - --argc; ++argv; - if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } - if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } - if (argc < 1) { printf("usage: c4 [-s] [-d] file ...\n"); return -1; } - - if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; } - - poolsz = 256*1024; // arbitrary size - if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } - if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } - if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } - if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } - - memset(sym, 0, poolsz); - memset(e, 0, poolsz); - memset(data, 0, poolsz); - - p = "char else enum if int return sizeof while " - "open read close printf malloc memset memcmp exit void main"; - i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table - i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table - next(); id[Tk] = Char; // handle void type - next(); idmain = id; // keep track of main - - if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } - if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } - p[i] = 0; - close(fd); - - // parse declarations - line = 1; - next(); - while (tk) { - bt = INT; // basetype - if (tk == Int) next(); - else if (tk == Char) { next(); bt = CHAR; } - else if (tk == Enum) { - next(); - if (tk != '{') next(); - if (tk == '{') { - next(); - i = 0; - while (tk != '}') { - if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } - next(); - if (tk == Assign) { - next(); - if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; } - i = ival; - next(); - } - id[Class] = Num; id[Type] = INT; id[Val] = i++; - if (tk == ',') next(); - } - next(); - } - } - while (tk != ';' && tk != '}') { - ty = bt; - while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } - if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } - next(); - id[Type] = ty; - if (tk == '(') { // function - id[Class] = Fun; - id[Val] = (int)(e + 1); - next(); i = 0; - while (tk != ')') { - ty = INT; - if (tk == Int) next(); - else if (tk == Char) { next(); ty = CHAR; } - while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } - if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } - id[HClass] = id[Class]; id[Class] = Loc; - id[HType] = id[Type]; id[Type] = ty; - id[HVal] = id[Val]; id[Val] = i++; - next(); - if (tk == ',') next(); - } - next(); - if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } - loc = ++i; - next(); - while (tk == Int || tk == Char) { - bt = (tk == Int) ? INT : CHAR; - next(); - while (tk != ';') { - ty = bt; - while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } - if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } - id[HClass] = id[Class]; id[Class] = Loc; - id[HType] = id[Type]; id[Type] = ty; - id[HVal] = id[Val]; id[Val] = ++i; - next(); - if (tk == ',') next(); - } - next(); - } - *++e = ENT; *++e = i - loc; - while (tk != '}') stmt(); - *++e = LEV; - id = sym; // unwind symbol table locals - while (id[Tk]) { - if (id[Class] == Loc) { - id[Class] = id[HClass]; - id[Type] = id[HType]; - id[Val] = id[HVal]; - } - id = id + Idsz; - } - } - else { - id[Class] = Glo; - id[Val] = (int)data; - data = data + sizeof(int); - } - if (tk == ',') next(); - } - next(); - } - - if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } - if (src) return 0; - - // setup stack - sp = (int *)((int)sp + poolsz); - *--sp = EXIT; // call exit if main returns - *--sp = PSH; t = sp; - *--sp = argc; - *--sp = (int)argv; - *--sp = (int)t; - - // run... - cycle = 0; - while (1) { - i = *pc++; ++cycle; - if (debug) { - printf("%d> %.4s", cycle, - &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," - "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," - "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[i * 5]); - if (i <= ADJ) printf(" %d\n", *pc); else printf("\n"); - } - if (i == LEA) a = (int)(bp + *pc++); // load local address - else if (i == IMM) a = *pc++; // load global address or immediate - else if (i == JMP) pc = (int *)*pc; // jump - else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine - else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero - else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero - else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine - else if (i == ADJ) sp = sp + *pc++; // stack adjust - else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine - else if (i == LI) a = *(int *)a; // load int - else if (i == LC) a = *(char *)a; // load char - else if (i == SI) *(int *)*sp++ = a; // store int - else if (i == SC) a = *(char *)*sp++ = a; // store char - else if (i == PSH) *--sp = a; // push - - else if (i == OR) a = *sp++ | a; - else if (i == XOR) a = *sp++ ^ a; - else if (i == AND) a = *sp++ & a; - else if (i == EQ) a = *sp++ == a; - else if (i == NE) a = *sp++ != a; - else if (i == LT) a = *sp++ < a; - else if (i == GT) a = *sp++ > a; - else if (i == LE) a = *sp++ <= a; - else if (i == GE) a = *sp++ >= a; - else if (i == SHL) a = *sp++ << a; - else if (i == SHR) a = *sp++ >> a; - else if (i == ADD) a = *sp++ + a; - else if (i == SUB) a = *sp++ - a; - else if (i == MUL) a = *sp++ * a; - else if (i == DIV) a = *sp++ / a; - else if (i == MOD) a = *sp++ % a; - - else if (i == OPEN) a = open((char *)sp[1], *sp); - else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); - else if (i == CLOS) a = close(*sp); - else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } - else if (i == MALC) a = (int)malloc(*sp); - else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); - else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); - else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } - else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } - } -} diff --git a/c5.c b/c5.c new file mode 100644 index 0000000..e364689 --- /dev/null +++ b/c5.c @@ -0,0 +1,583 @@ +// c5.c - C in five functions + +// c4.c plus +// abstract syntax tree creation +// back-end code generator +// parameters passed in correct order +// various optimizations + +// Written by Robert Swierczek + +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include "w32.h" +#endif + +#define int long + +char *p, *lp, // current position in source code + *data; // data/bss pointer + +int *e, *le, // current position in emitted code + *id, // currently parsed identifier + *n, // current node in abstract syntax tree + *sym, // symbol table (simple list of identifiers) + tk, // current token + ival, // current token value + ty, // current expression type + line, // current line number + src, // print source and assembly flag + debug; // print executed instructions + +// tokens and classes (operators last and in precedence order) +enum { + Num = 128, Fun, Sys, Glo, Loc, Id, Load, Enter, + Char, Else, Enum, If, Int, Return, Sizeof, While, + Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak +}; + +// opcodes +enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , + OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , + OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,EXIT }; + +// types +enum { CHAR, INT, PTR }; + +// identifier offsets (since we can't create an ident struct) +enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; + +void next() +{ + char *pp; + + while (tk = *p) { + ++p; + if (tk == '\n') { + if (src) { + printf("%ld: %.*s", line, p - lp, lp); + lp = p; + while (le < e) { + printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," + "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," + "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,EXIT,"[*++le * 5]); + if (*le <= ADJ) printf(" %ld\n", *++le); else printf("\n"); + } + } + ++line; + } + else if (tk == '#') { + while (*p != 0 && *p != '\n') ++p; + } + else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { + pp = p - 1; + while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') + tk = tk * 147 + *p++; + tk = (tk << 6) + (p - pp); + id = sym; + while (id[Tk]) { + if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } + id = id + Idsz; + } + id[Name] = (int)pp; + id[Hash] = tk; + tk = id[Tk] = Id; + return; + } + else if (tk >= '0' && tk <= '9') { + if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } + else if (*p == 'x' || *p == 'X') { + while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) + ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); + } + else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } + tk = Num; + return; + } + else if (tk == '/') { + if (*p == '/') { + ++p; + while (*p != 0 && *p != '\n') ++p; + } + else { + tk = Div; + return; + } + } + else if (tk == '\'' || tk == '"') { + pp = data; + while (*p != 0 && *p != tk) { + if ((ival = *p++) == '\\') { + if ((ival = *p++) == 'n') ival = '\n'; + } + if (tk == '"') *data++ = ival; + } + ++p; + if (tk == '"') ival = (int)pp; else tk = Num; + return; + } + else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } + else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } + else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } + else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } + else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } + else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } + else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } + else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } + else if (tk == '^') { tk = Xor; return; } + else if (tk == '%') { tk = Mod; return; } + else if (tk == '*') { tk = Mul; return; } + else if (tk == '[') { tk = Brak; return; } + else if (tk == '?') { tk = Cond; return; } + else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; + } +} + +void expr(int lev) +{ + int t, *d, *b; + + if (!tk) { printf("%ld: unexpected eof in expression\n", line); exit(-1); } + else if (tk == Num) { *--n = ival; *--n = Num; next(); ty = INT; } + else if (tk == '"') { + *--n = ival; *--n = Num; next(); + while (tk == '"') next(); + data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; + } + else if (tk == Sizeof) { + next(); if (tk == '(') next(); else { printf("%ld: open paren expected in sizeof\n", line); exit(-1); } + ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk == ')') next(); else { printf("%ld: close paren expected in sizeof\n", line); exit(-1); } + *--n = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = Num; + ty = INT; + } + else if (tk == Id) { + d = id; next(); + if (tk == '(') { + if (d[Class] != Sys && d[Class] != Fun) { printf("%ld: bad function call\n", line); exit(-1); } + next(); + t = 0; b = 0; + while (tk != ')') { expr(Assign); *--n = (int)b; b = n; ++t; if (tk == ',') next(); } + next(); + *--n = t; *--n = d[Val]; *--n = (int)b; *--n = d[Class]; + ty = d[Type]; + } + else if (d[Class] == Num) { *--n = d[Val]; *--n = Num; ty = INT; } + else { + if (d[Class] == Loc) { *--n = d[Val]; *--n = Loc; } + else if (d[Class] == Glo) { *--n = d[Val]; *--n = Num; } + else { printf("%ld: undefined variable\n", line); exit(-1); } + *--n = ty = d[Type]; *--n = Load; + } + } + else if (tk == '(') { + next(); + if (tk == Int || tk == Char) { + t = (tk == Int) ? INT : CHAR; next(); + while (tk == Mul) { next(); t = t + PTR; } + if (tk == ')') next(); else { printf("%ld: bad cast\n", line); exit(-1); } + expr(Inc); + ty = t; + } + else { + expr(Assign); + if (tk == ')') next(); else { printf("%ld: close paren expected\n", line); exit(-1); } + } + } + else if (tk == Mul) { + next(); expr(Inc); + if (ty > INT) ty = ty - PTR; else { printf("%ld: bad dereference\n", line); exit(-1); } + *--n = ty; *--n = Load; + } + else if (tk == And) { + next(); expr(Inc); + if (*n == Load) n = n+2; else { printf("%ld: bad address-of\n", line); exit(-1); } + ty = ty + PTR; + } + else if (tk == '!') { + next(); expr(Inc); + if (*n == Num) n[1] = !n[1]; else { *--n = 0; *--n = Num; --n; *n = (int)(n+3); *--n = Eq; } + ty = INT; + } + else if (tk == '~') { + next(); expr(Inc); + if (*n == Num) n[1] = ~n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Xor; } + ty = INT; + } + else if (tk == Add) { next(); expr(Inc); ty = INT; } + else if (tk == Sub) { + next(); expr(Inc); + if (*n == Num) n[1] = -n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } + ty = INT; + } + else if (tk == Inc || tk == Dec) { + t = tk; next(); expr(Inc); + if (*n == Load) *n = t; else { printf("%ld: bad lvalue in pre-increment\n", line); exit(-1); } + } + else { printf("%ld: bad expression\n", line); exit(-1); } + + while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method + t = ty; b = n; + if (tk == Assign) { + next(); + if (*n != Load) { printf("%ld: bad lvalue in assignment\n", line); exit(-1); } + expr(Assign); *--n = (int)(b+2); *--n = ty = t; *--n = Assign; + } + else if (tk == Cond) { + next(); + expr(Assign); + if (tk == ':') next(); else { printf("%ld: conditional missing colon\n", line); exit(-1); } + d = n; + expr(Cond); + --n; *n = (int)(n+1); *--n = (int)d; *--n = (int)b; *--n = Cond; + } + else if (tk == Lor) { next(); expr(Lan); if (*n==Num && *b==Num) n[1] = b[1] || n[1]; else { *--n = (int)b; *--n = Lor; } ty = INT; } + else if (tk == Lan) { next(); expr(Or); if (*n==Num && *b==Num) n[1] = b[1] && n[1]; else { *--n = (int)b; *--n = Lan; } ty = INT; } + else if (tk == Or) { next(); expr(Xor); if (*n==Num && *b==Num) n[1] = b[1] | n[1]; else { *--n = (int)b; *--n = Or; } ty = INT; } + else if (tk == Xor) { next(); expr(And); if (*n==Num && *b==Num) n[1] = b[1] ^ n[1]; else { *--n = (int)b; *--n = Xor; } ty = INT; } + else if (tk == And) { next(); expr(Eq); if (*n==Num && *b==Num) n[1] = b[1] & n[1]; else { *--n = (int)b; *--n = And; } ty = INT; } + else if (tk == Eq) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] == n[1]; else { *--n = (int)b; *--n = Eq; } ty = INT; } + else if (tk == Ne) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] != n[1]; else { *--n = (int)b; *--n = Ne; } ty = INT; } + else if (tk == Lt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] < n[1]; else { *--n = (int)b; *--n = Lt; } ty = INT; } + else if (tk == Gt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] > n[1]; else { *--n = (int)b; *--n = Gt; } ty = INT; } + else if (tk == Le) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] <= n[1]; else { *--n = (int)b; *--n = Le; } ty = INT; } + else if (tk == Ge) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] >= n[1]; else { *--n = (int)b; *--n = Ge; } ty = INT; } + else if (tk == Shl) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] << n[1]; else { *--n = (int)b; *--n = Shl; } ty = INT; } + else if (tk == Shr) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] >> n[1]; else { *--n = (int)b; *--n = Shr; } ty = INT; } + else if (tk == Add) { + next(); expr(Mul); + if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; } + } + else if (tk == Sub) { + next(); expr(Mul); + if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + if (*n == Num && *b == Num) n[1] = b[1] - n[1]; else { *--n = (int)b; *--n = Sub; } + } + else if (tk == Mul) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] * n[1]; else { *--n = (int)b; *--n = Mul; } ty = INT; } + else if (tk == Div) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] / n[1]; else { *--n = (int)b; *--n = Div; } ty = INT; } + else if (tk == Mod) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] % n[1]; else { *--n = (int)b; *--n = Mod; } ty = INT; } + else if (tk == Inc || tk == Dec) { + if (*n == Load) *n = tk; else { printf("%ld: bad lvalue in post-increment\n", line); exit(-1); } + *--n = (ty > PTR) ? sizeof(int) : sizeof(char); *--n = Num; + *--n = (int)b; *--n = (tk == Inc) ? Sub : Add; + next(); + } + else if (tk == Brak) { + next(); expr(Assign); + if (tk == ']') next(); else { printf("%ld: close bracket expected\n", line); exit(-1); } + if (t > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + else if (t < PTR) { printf("%ld: pointer type expected\n", line); exit(-1); } + if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; } + *--n = ty = t - PTR; *--n = Load; + } + else { printf("%ld: compiler error tk=%ld\n", line, tk); exit(-1); } + } +} + +void stmt() +{ + int *a, *b, *c; + + if (tk == If) { + next(); + if (tk == '(') next(); else { printf("%ld: open paren expected\n", line); exit(-1); } + expr(Assign); a = n; + if (tk == ')') next(); else { printf("%ld: close paren expected\n", line); exit(-1); } + stmt(); b = n; + if (tk == Else) { next(); stmt(); c = n; } else c = 0; + *--n = (int)c; *--n = (int)b; *--n = (int)a; *--n = Cond; + } + else if (tk == While) { + next(); + if (tk == '(') next(); else { printf("%ld: open paren expected\n", line); exit(-1); } + expr(Assign); a = n; + if (tk == ')') next(); else { printf("%ld: close paren expected\n", line); exit(-1); } + stmt(); + *--n = (int)a; *--n = While; + } + else if (tk == Return) { + next(); + if (tk != ';') { expr(Assign); a = n; } else a = 0; + if (tk == ';') next(); else { printf("%ld: semicolon expected\n", line); exit(-1); } + *--n = (int)a; *--n = Return; + } + else if (tk == '{') { + next(); + *--n = ';'; + while (tk != '}') { a = n; stmt(); *--n = (int)a; *--n = '{'; } + next(); + } + else if (tk == ';') { + next(); *--n = ';'; + } + else { + expr(Assign); + if (tk == ';') next(); else { printf("%ld: semicolon expected\n", line); exit(-1); } + } +} + +void gen(int *n) +{ + int i, *b; + + i = *n; + if (i == Num) { *++e = IMM; *++e = n[1]; } + else if (i == Loc) { *++e = LEA; *++e = n[1]; } + else if (i == Load) { gen(n+2); *++e = (n[1] == CHAR) ? LC : LI; } + else if (i == Assign) { gen((int *)n[2]); *++e = PSH; gen(n+3); *++e = (n[1] == CHAR) ? SC : SI; } + else if (i == Inc || i == Dec) { + gen(n+2); + *++e = PSH; *++e = (n[1] == CHAR) ? LC : LI; *++e = PSH; + *++e = IMM; *++e = (n[1] > PTR) ? sizeof(int) : sizeof(char); + *++e = (i == Inc) ? ADD : SUB; + *++e = (n[1] == CHAR) ? SC : SI; + } + else if (i == Cond) { + gen((int *)n[1]); + *++e = BZ; b = ++e; + gen((int *)n[2]); + if (n[3]) { *b = (int)(e + 3); *++e = JMP; b = ++e; gen((int *)n[3]); } + *b = (int)(e + 1); + } + else if (i == Lor) { gen((int *)n[1]); *++e = BNZ; b = ++e; gen(n+2); *b = (int)(e + 1); } + else if (i == Lan) { gen((int *)n[1]); *++e = BZ; b = ++e; gen(n+2); *b = (int)(e + 1); } + else if (i == Or) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = OR; } + else if (i == Xor) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = XOR; } + else if (i == And) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = AND; } + else if (i == Eq) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = EQ; } + else if (i == Ne) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = NE; } + else if (i == Lt) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = LT; } + else if (i == Gt) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = GT; } + else if (i == Le) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = LE; } + else if (i == Ge) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = GE; } + else if (i == Shl) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SHL; } + else if (i == Shr) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SHR; } + else if (i == Add) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = ADD; } + else if (i == Sub) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SUB; } + else if (i == Mul) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = MUL; } + else if (i == Div) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = DIV; } + else if (i == Mod) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = MOD; } + else if (i == Sys || i == Fun) { + b = (int *)n[1]; + while (b) { gen(b+1); *++e = PSH; b = (int *)*b; } + if (i == Fun) *++e = JSR; *++e = n[2]; + if (n[3]) { *++e = ADJ; *++e = n[3]; } + } + else if (i == While) { + *++e = JMP; b = ++e; gen(n+2); *b = (int)(e + 1); + gen((int *)n[1]); + *++e = BNZ; *++e = (int)(b + 1); + } + else if (i == Return) { if (n[1]) gen((int *)n[1]); *++e = LEV; } + else if (i == '{') { gen((int *)n[1]); gen(n+2); } + else if (i == Enter) { *++e = ENT; *++e = n[1]; gen(n+2); *++e = LEV; } + else if (i != ';') { printf("%ld: compiler error gen=%ld\n", line, i); exit(-1); } +} + +#undef int +int main(int argc, char **argv) +#define int long +{ + int fd, bt, ty, poolsz, *idmain, *ast; + int *pc, *sp, *bp, a, cycle; // vm registers + int i, *t; // temps + + --argc; ++argv; + if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } + if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } + if (argc < 1) { printf("usage: c5 [-s] [-d] file ...\n"); return -1; } + + if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; } + + poolsz = 256*1024; // arbitrary size + if (!(sym = malloc(poolsz))) { printf("could not malloc(%ld) symbol area\n", poolsz); return -1; } + if (!(le = e = malloc(poolsz))) { printf("could not malloc(%ld) text area\n", poolsz); return -1; } + if (!(data = malloc(poolsz))) { printf("could not malloc(%ld) data area\n", poolsz); return -1; } + if (!(sp = malloc(poolsz))) { printf("could not malloc(%ld) stack area\n", poolsz); return -1; } + if (!(ast = malloc(poolsz))) { printf("could not malloc(%ld) abstract syntax tree area\n", poolsz); return -1; } + ast = (int *)((int)ast + poolsz); // abstract syntax tree is most efficiently built as a stack + + memset(sym, 0, poolsz); + memset(e, 0, poolsz); + memset(data, 0, poolsz); + + p = "char else enum if int return sizeof while " + "open read close printf malloc memset memcmp memcpy exit void main"; + i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table + i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table + next(); id[Tk] = Char; // handle void type + next(); idmain = id; // keep track of main + + if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%ld) source area\n", poolsz); return -1; } + if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %ld\n", i); return -1; } + p[i] = 0; + close(fd); + + // parse declarations + line = 1; + next(); + while (tk) { + bt = INT; // basetype + if (tk == Int) next(); + else if (tk == Char) { next(); bt = CHAR; } + else if (tk == Enum) { + next(); + if (tk != '{') next(); + if (tk == '{') { + next(); + i = 0; + while (tk != '}') { + if (tk != Id) { printf("%ld: bad enum identifier %ld\n", line, tk); return -1; } + next(); + if (tk == Assign) { + next(); + n = ast; expr(Cond); + if (*n != Num) { printf("%ld: bad enum initializer\n", line); return -1; } + i = n[1]; + } + id[Class] = Num; id[Type] = INT; id[Val] = i++; + if (tk == ',') next(); + } + next(); + } + } + while (tk != ';' && tk != '}') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%ld: bad global declaration\n", line); return -1; } + if (id[Class]) { printf("%ld: duplicate global definition\n", line); return -1; } + next(); + id[Type] = ty; + if (tk == '(') { // function + id[Class] = Fun; + id[Val] = (int)(e + 1); + next(); i = 2; + while (tk != ')') { + ty = INT; + if (tk == Int) next(); + else if (tk == Char) { next(); ty = CHAR; } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%ld: bad parameter declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%ld: duplicate parameter definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = i++; + next(); + if (tk == ',') next(); + } + next(); + if (tk != '{') { printf("%ld: bad function definition\n", line); return -1; } + i = 0; + next(); + while (tk == Int || tk == Char) { + bt = (tk == Int) ? INT : CHAR; + next(); + while (tk != ';') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%ld: bad local declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%ld: duplicate local definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = --i; + next(); + if (tk == ',') next(); + } + next(); + } + n = ast; + *--n = ';'; while (tk != '}') { t = n; stmt(); *--n = (int)t; *--n = '{'; } + *--n = -i; *--n = Enter; + gen(n); + id = sym; // unwind symbol table locals + while (id[Tk]) { + if (id[Class] == Loc) { + id[Class] = id[HClass]; + id[Type] = id[HType]; + id[Val] = id[HVal]; + } + id = id + Idsz; + } + } + else { + id[Class] = Glo; + id[Val] = (int)data; + data = data + sizeof(int); + } + if (tk == ',') next(); + } + next(); + } + + if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } + if (src) return 0; + + // setup stack + sp = (int *)((int)sp + poolsz); + *--sp = EXIT; // call exit if main returns + *--sp = PSH; t = sp; + *--sp = (int)argv; + *--sp = argc; + *--sp = (int)t; + + // run... + cycle = 0; + while (1) { + i = *pc++; ++cycle; + if (debug) { + printf("%ld> %.4s", cycle, + &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," + "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," + "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,EXIT,"[i * 5]); + if (i <= ADJ) printf(" %ld\n", *pc); else printf("\n"); + } + if (i == LEA) a = (int)(bp + *pc++); // load local address + else if (i == IMM) a = *pc++; // load global address or immediate + else if (i == JMP) pc = (int *)*pc; // jump + else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine + else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero + else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero + else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine + else if (i == ADJ) sp = sp + *pc++; // stack adjust + else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine + else if (i == LI) a = *(int *)a; // load int + else if (i == LC) a = *(char *)a; // load char + else if (i == SI) *(int *)*sp++ = a; // store int + else if (i == SC) a = *(char *)*sp++ = a; // store char + else if (i == PSH) *--sp = a; // push + + else if (i == OR) a = *sp++ | a; + else if (i == XOR) a = *sp++ ^ a; + else if (i == AND) a = *sp++ & a; + else if (i == EQ) a = *sp++ == a; + else if (i == NE) a = *sp++ != a; + else if (i == LT) a = *sp++ < a; + else if (i == GT) a = *sp++ > a; + else if (i == LE) a = *sp++ <= a; + else if (i == GE) a = *sp++ >= a; + else if (i == SHL) a = *sp++ << a; + else if (i == SHR) a = *sp++ >> a; + else if (i == ADD) a = *sp++ + a; + else if (i == SUB) a = *sp++ - a; + else if (i == MUL) a = *sp++ * a; + else if (i == DIV) a = *sp++ / a; + else if (i == MOD) a = *sp++ % a; + + else if (i == OPEN) a = open((char *)*sp, sp[1]); + else if (i == READ) a = read(*sp, (char *)sp[1], sp[2]); + else if (i == CLOS) a = close(*sp); + else if (i == PRTF) a = printf((char *)*sp, sp[1], sp[2], sp[3], sp[4], sp[5]); + else if (i == MALC) a = (int)malloc(*sp); + else if (i == MSET) a = (int)memset((char *)*sp, sp[1], sp[2]); + else if (i == MCMP) a = memcmp((char *)*sp, (char *)sp[1], sp[2]); + else if (i == MCPY) a = (int)memcpy((char *)*sp, (char *)sp[1], sp[2]); + else if (i == EXIT) { printf("exit(%ld) cycle = %ld\n", *sp, cycle); return *sp; } + else { printf("unknown instruction = %ld! cycle = %ld\n", i, cycle); return -1; } + } +} diff --git a/c5x86.c b/c5x86.c new file mode 100644 index 0000000..b298146 --- /dev/null +++ b/c5x86.c @@ -0,0 +1,579 @@ +// c5x86.c - C in five functions (native x86 version) + +// c4.c plus +// abstract syntax tree creation +// back-end code generator +// parameters passed in correct order +// various optimizations + +// Written by Robert Swierczek + +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include "w32.h" +#else +#include +#endif + +char *p, *lp, // current position in source code + *e, // current position in emitted code + *data, // data/bss pointer + *dsym; // external function lookup name + +int *id, // currently parsed identifier + *n, // current node in abstract syntax tree + *sym, // symbol table (simple list of identifiers) + tk, // current token + ival, // current token value + ty, // current expression type + line, // current line number + src; // print source and assembly flag + +// tokens and classes (operators last and in precedence order) +enum { + Num = 128, Fun, Glo, Loc, Id, Load, Enter, + Char, Else, Enum, If, Int, Return, Sizeof, While, + Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak +}; + +// types +enum { CHAR, INT, PTR }; + +// identifier offsets (since we can't create an ident struct) +enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; + +void next() +{ + char *pp; + + while (tk = *p) { + ++p; + if (tk == '\n') { + if (src) { + printf("%d: %.*s", line, p - lp, lp); + lp = p; + } + ++line; + } + else if (tk == '#') { + while (*p != 0 && *p != '\n') ++p; + } + else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { + pp = p - 1; + while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') + tk = tk * 147 + *p++; + tk = (tk << 6) + (p - pp); + id = sym; + while (id[Tk]) { + if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } + id = id + Idsz; + } + id[Name] = (int)pp; + id[Hash] = tk; + tk = id[Tk] = Id; + return; + } + else if (tk >= '0' && tk <= '9') { + if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } + else if (*p == 'x' || *p == 'X') { + while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) + ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); + } + else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } + tk = Num; + return; + } + else if (tk == '/') { + if (*p == '/') { + ++p; + while (*p != 0 && *p != '\n') ++p; + } + else { + tk = Div; + return; + } + } + else if (tk == '\'' || tk == '"') { + pp = data; + while (*p != 0 && *p != tk) { + if ((ival = *p++) == '\\') { + if ((ival = *p++) == 'n') ival = '\n'; + } + if (tk == '"') *data++ = ival; + } + ++p; + if (tk == '"') ival = (int)pp; else tk = Num; + return; + } + else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } + else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } + else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } + else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } + else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } + else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } + else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } + else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } + else if (tk == '^') { tk = Xor; return; } + else if (tk == '%') { tk = Mod; return; } + else if (tk == '*') { tk = Mul; return; } + else if (tk == '[') { tk = Brak; return; } + else if (tk == '?') { tk = Cond; return; } + else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; + } +} + +void expr(int lev) +{ + int t, *d, *b; + + if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } + else if (tk == Num) { *--n = ival; *--n = Num; next(); ty = INT; } + else if (tk == '"') { + *--n = ival; *--n = Num; next(); + while (tk == '"') next(); + data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; + } + else if (tk == Sizeof) { + next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } + ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } + *--n = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = Num; + ty = INT; + } + else if (tk == Id) { + d = id; next(); + if (tk == '(') { + if (!d[Class]) { + memcpy(dsym, (char *)d[Name], d[Hash] & 63); dsym[d[Hash] & 63] = 0; + if (d[Val] = (int)dlsym(0, dsym)) d[Class] = Fun; + } + if (d[Class] != Fun) { printf("%d: bad function call\n", line); exit(-1); } + next(); + t = 0; b = 0; + while (tk != ')') { expr(Assign); *--n = (int)b; b = n; ++t; if (tk == ',') next(); } + next(); + *--n = t; *--n = d[Val]; *--n = (int)b; *--n = d[Class]; + ty = d[Type]; + } + else if (d[Class] == Num) { *--n = d[Val]; *--n = Num; ty = INT; } + else { + if (d[Class] == Loc) { *--n = d[Val]; *--n = Loc; } + else if (d[Class] == Glo) { *--n = d[Val]; *--n = Num; } + else { printf("%d: undefined variable\n", line); exit(-1); } + *--n = ty = d[Type]; *--n = Load; + } + } + else if (tk == '(') { + next(); + if (tk == Int || tk == Char) { + t = (tk == Int) ? INT : CHAR; next(); + while (tk == Mul) { next(); t = t + PTR; } + if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } + expr(Inc); + ty = t; + } + else { + expr(Assign); + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + } + } + else if (tk == Mul) { + next(); expr(Inc); + if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } + *--n = ty; *--n = Load; + } + else if (tk == And) { + next(); expr(Inc); + if (*n == Load) n = n+2; else { printf("%d: bad address-of\n", line); exit(-1); } + ty = ty + PTR; + } + else if (tk == '!') { + next(); expr(Inc); + if (*n == Num) n[1] = !n[1]; else { *--n = 0; *--n = Num; --n; *n = (int)(n+3); *--n = Eq; } + ty = INT; + } + else if (tk == '~') { + next(); expr(Inc); + if (*n == Num) n[1] = ~n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Xor; } + ty = INT; + } + else if (tk == Add) { next(); expr(Inc); ty = INT; } + else if (tk == Sub) { + next(); expr(Inc); + if (*n == Num) n[1] = -n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } + ty = INT; + } + else if (tk == Inc || tk == Dec) { + t = tk; next(); expr(Inc); + if (*n == Load) *n = t; else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } + } + else { printf("%d: bad expression\n", line); exit(-1); } + + while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method + t = ty; b = n; + if (tk == Assign) { + next(); + if (*n != Load) { printf("%d: bad lvalue in assignment\n", line); exit(-1); } + expr(Assign); *--n = (int)(b+2); *--n = ty = t; *--n = Assign; + } + else if (tk == Cond) { + next(); + expr(Assign); + if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } + d = n; + expr(Cond); + --n; *n = (int)(n+1); *--n = (int)d; *--n = (int)b; *--n = Cond; + } + else if (tk == Lor) { next(); expr(Lan); if (*n==Num && *b==Num) n[1] = b[1] || n[1]; else { *--n = (int)b; *--n = Lor; } ty = INT; } + else if (tk == Lan) { next(); expr(Or); if (*n==Num && *b==Num) n[1] = b[1] && n[1]; else { *--n = (int)b; *--n = Lan; } ty = INT; } + else if (tk == Or) { next(); expr(Xor); if (*n==Num && *b==Num) n[1] = b[1] | n[1]; else { *--n = (int)b; *--n = Or; } ty = INT; } + else if (tk == Xor) { next(); expr(And); if (*n==Num && *b==Num) n[1] = b[1] ^ n[1]; else { *--n = (int)b; *--n = Xor; } ty = INT; } + else if (tk == And) { next(); expr(Eq); if (*n==Num && *b==Num) n[1] = b[1] & n[1]; else { *--n = (int)b; *--n = And; } ty = INT; } + else if (tk == Eq) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] == n[1]; else { *--n = (int)b; *--n = Eq; } ty = INT; } + else if (tk == Ne) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] != n[1]; else { *--n = (int)b; *--n = Ne; } ty = INT; } + else if (tk == Lt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] < n[1]; else { *--n = (int)b; *--n = Lt; } ty = INT; } + else if (tk == Gt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] > n[1]; else { *--n = (int)b; *--n = Gt; } ty = INT; } + else if (tk == Le) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] <= n[1]; else { *--n = (int)b; *--n = Le; } ty = INT; } + else if (tk == Ge) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] >= n[1]; else { *--n = (int)b; *--n = Ge; } ty = INT; } + else if (tk == Shl) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] << n[1]; else { *--n = (int)b; *--n = Shl; } ty = INT; } + else if (tk == Shr) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] >> n[1]; else { *--n = (int)b; *--n = Shr; } ty = INT; } + else if (tk == Add) { + next(); expr(Mul); + if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; } + } + else if (tk == Sub) { + next(); expr(Mul); + if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + if (*n == Num && *b == Num) n[1] = b[1] - n[1]; else { *--n = (int)b; *--n = Sub; } + } + else if (tk == Mul) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] * n[1]; else { *--n = (int)b; *--n = Mul; } ty = INT; } + else if (tk == Div) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] / n[1]; else { *--n = (int)b; *--n = Div; } ty = INT; } + else if (tk == Mod) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] % n[1]; else { *--n = (int)b; *--n = Mod; } ty = INT; } + else if (tk == Inc || tk == Dec) { + if (*n == Load) *n = tk; else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } + *--n = (ty > PTR) ? sizeof(int) : sizeof(char); *--n = Num; + *--n = (int)b; *--n = (tk == Inc) ? Sub : Add; + next(); + } + else if (tk == Brak) { + next(); expr(Assign); + if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } + if (t > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } } + else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } + if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; } + *--n = ty = t - PTR; *--n = Load; + } + else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } + } +} + +void stmt() +{ + int *a, *b, *c; + + if (tk == If) { + next(); + if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + expr(Assign); a = n; + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + stmt(); b = n; + if (tk == Else) { next(); stmt(); c = n; } else c = 0; + *--n = (int)c; *--n = (int)b; *--n = (int)a; *--n = Cond; + } + else if (tk == While) { + next(); + if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } + expr(Assign); a = n; + if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } + stmt(); + *--n = (int)a; *--n = While; + } + else if (tk == Return) { + next(); + if (tk != ';') { expr(Assign); a = n; } else a = 0; + if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + *--n = (int)a; *--n = Return; + } + else if (tk == '{') { + next(); + *--n = ';'; + while (tk != '}') { a = n; stmt(); *--n = (int)a; *--n = '{'; } + next(); + } + else if (tk == ';') { + next(); *--n = ';'; + } + else { + expr(Assign); + if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } + } +} + +void gen(int *n) +{ + int i; char *b; + + i = *n; + if (i == Num) { + *e++ = 0xb8; *(int *)e = n[1]; e = e+4; if (src) printf(" movl $%d, %%eax\n",n[1]); + } + else if (i == Loc) { + if (n[1] < -32 || n[1] > 32) { printf("%d: gen(lea) out of bounds\n", line); exit(-1); } + *(int *)e = 0x458d + (n[1] << 18); e = e+3; if (src) printf(" leal $%d(%%ebp), %%eax\n", n[1]*4); + } + else if (i == Load) { + gen(n+2); + if (n[1] == CHAR) { *(int *)e = 0x00be0f; e = e+3; if (src) printf(" movsbl (%%eax), %%eax\n"); } + else { *(int *)e = 0x008b; e = e+2; if (src) printf(" movl (%%eax), %%eax\n"); } + } + else if (i == Assign) { + gen((int *)n[2]); *e++ = 0x50; if (src) printf(" push %%eax\n"); + gen(n+3); *e++ = 0x59; if (src) printf(" pop %%ecx\n"); + if (n[1] == CHAR) { *(int *)e = 0x0188; e = e+2; if (src) printf(" movb %%al, (%%ecx)\n"); } + else { *(int *)e = 0x0189; e = e+2; if (src) printf(" movl %%eax, (%%ecx)\n"); } + } + else if (i == Inc || i == Dec) { + gen(n+2); *e++ = 0x50; if (src) printf(" push %%eax\n"); + if (n[1] == CHAR) { *(int *)e = 0x00be0f; e = e+3; if (src) printf(" movsbl (%%eax), %%eax\n"); } + else { *(int *)e = 0x008b; e = e+2; if (src) printf(" movl (%%eax), %%eax\n"); } + i = ((i == Inc) ? 1 : -1) * ((n[1] > PTR) ? sizeof(int) : sizeof(char)); + *e++ = 0xb9; *(int *)e = i; e = e+4; if (src) printf(" movl $%d, %%ecx\n", i); + *(int *)e = 0xc801; e = e+2; if (src) printf(" addl %%ecx, %%eax\n"); + *e++ = 0x59; if (src) printf(" pop %%ecx\n"); + if (n[1] == CHAR) { *(int *)e = 0x0188; e = e+2; if (src) printf(" movb %%al, (%%ecx)\n"); } + else { *(int *)e = 0x0189; e = e+2; if (src) printf(" movl %%eax, (%%ecx)\n"); } + } + else if (i == Cond) { + gen((int *)n[1]); + *(int *)e = 0x840fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jeq \n"); + gen((int *)n[2]); + if (n[3]) { + *(int *)b = e+5 - b - 4; + *e++ = 0xe9; b = e; e = e + 4; if (src) printf(" jmp \n"); + gen((int *)n[3]); + } + *(int *)b = e - b - 4; + } + else if (i == Lor) { + gen((int *)n[1]); + *(int *)e = 0x850fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jne \n"); + gen(n+2); + *(int *)b = e - b - 4; + } + else if (i == Lan) { + gen((int *)n[1]); + *(int *)e = 0x840fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jeq \n"); + gen(n+2); + *(int *)b = e - b - 4; + } + else if (i >= Or && i <= Mod) { + gen(n+2); *e++ = 0x50; if (src) printf(" push %%eax\n"); + gen((int *)n[1]); *e++ = 0x59; if (src) printf(" pop %%ecx\n"); + if (i == Or) { *(int *)e = 0xc809; e = e+2; if (src) printf(" orl %%ecx, %%eax\n"); } + else if (i == Xor) { *(int *)e = 0xc831; e = e+2; if (src) printf(" xorl %%ecx, %%eax\n"); } + else if (i == And) { *(int *)e = 0xc821; e = e+2; if (src) printf(" andl %%ecx, %%eax\n"); } + else if (i >= Eq && i <= Ge) { + *(int *)e = 0xc839; e = e+2; if (src) printf(" cmp %%eax, %%ecx\n"); + *e++ = 0xb8; *(int *)e = 0; e = e+4; if (src) printf(" mov $0, %%eax\n"); + if (i == Eq) { *(int *)e = 0xc0940f; if (src) printf(" sete %%al\n"); } + else if (i == Ne) { *(int *)e = 0xc0950f; if (src) printf(" setne %%al\n"); } + else if (i == Lt) { *(int *)e = 0xc09c0f; if (src) printf(" setl %%al\n"); } + else if (i == Gt) { *(int *)e = 0xc09f0f; if (src) printf(" setg %%al\n"); } + else if (i == Le) { *(int *)e = 0xc09e0f; if (src) printf(" setle %%al\n"); } + else { *(int *)e = 0xc09d0f; if (src) printf(" setge %%al\n"); } + e = e+3; + } + else if (i == Shl) { *(int *)e = 0xe0d3; e = e+2; if (src) printf(" shl %%cl, %%eax\n"); } + else if (i == Shr) { *(int *)e = 0xf8d3; e = e+2; if (src) printf(" sar %%cl, %%eax\n"); } + else if (i == Add) { *(int *)e = 0xc801; e = e+2; if (src) printf(" addl %%ecx, %%eax\n"); } + else if (i == Sub) { *(int *)e = 0xc829; e = e+2; if (src) printf(" subl %%ecx, %%eax\n"); } + else if (i == Mul) { *(int *)e = 0xc1af0f; e = e+3; if (src) printf(" imul %%ecx, %%eax\n"); } + else if (i == Div) { *(int *)e = 0xf9f799; e = e+3; if (src) printf(" cltd\n idiv %%ecx, %%eax\n"); } + else if (i == Mod) { *(int *)e = 0x92f9f799; e=e+4; if (src) printf(" cltd\n idiv %%ecx, %%eax\n xchg %%edx, %%eax\n"); } + } + else if (i == Fun) { + i = n[1]; + while (i) { + gen(((int *)i)+1); *e++ = 0x50; i = *(int *)i; if (src) printf(" push %%eax\n"); + } + *e++ = 0xe8; *(int *)e = n[2]-(int)e-4; e = e+4; if (src) printf(" call \n"); + if (n[3]) { + *(int *)e = 0xc481; e = e+2; + *(int *)e = n[3]*4; e = e+4; if (src) printf(" add $%d, %%esp\n", n[3]*4); + } + } + else if (i == While) { + *e++ = 0xe9; b = e; e = e+4; if (src) printf(" jmp \n"); + gen(n+2); + *(int *)b = e - b - 4; + gen((int *)n[1]); + *(int *)e = 0x850fc085; e = e+4; if (src) printf(" test %%eax, %%eax\n"); + *(int *)e = b - e; e = e+4; if (src) printf(" jne $%d\n", b - e); + } + else if (i == Return) { + if (n[1]) gen((int *)n[1]); if (src) printf(" mov %%ebp, %%esp\n"); + *(int *)e = 0xc35dec89; e = e+4; if (src) printf(" pop %%ebp\n ret\n"); + } + else if (i == '{') { + gen((int *)n[1]); gen(n+2); + } + else if (i == Enter) { + *(int *)e = 0xe58955; e = e+3; if (src) printf(" push %%ebp;\n mov %%esp, %%ebp\n"); + if (n[1]) { + *(int *)e = 0xec81; e = e+2; + *(int *)e = n[1]*4; e = e+4; if (src) printf(" subl $%d, %%esp\n", n[1]*4); + } + gen(n+2); if (src) printf(" mov %%ebp, %%esp\n"); + *(int *)e = 0xc35dec89; e = e+4; if (src) printf(" pop %%ebp\n ret\n"); + } + else if (i != ';') { printf("%d: compiler error gen=%d\n", line, i); exit(-1); } +} + +int main(int argc, char **argv) +{ + int fd, bt, ty, poolsz, *idmain, *ast; + int i, *t; // temps + + --argc; ++argv; + if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } + if (argc < 1) { printf("usage: c5x86 [-s] file ...\n"); return -1; } + + if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; } + + poolsz = 256*1024; // arbitrary size + if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } + if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } + if (!(dsym = malloc(64))) { printf("could not malloc(64) dsym\n"); return -1; } + if (!(ast = malloc(poolsz))) { printf("could not malloc(%d) abstract syntax tree area\n", poolsz); return -1; } + ast = (int *)((int)ast + poolsz); // abstract syntax tree is most efficiently built as a stack + + memset(sym, 0, poolsz); + memset(data, 0, poolsz); + + if (!(e = mmap(0, poolsz, 7, 0x22, -1, 0))) { printf("could not mmap() executable memory\n"); return -1; } + + p = "char else enum if int return sizeof while void main"; + i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table + next(); id[Tk] = Char; // handle void type + next(); idmain = id; // keep track of main + + if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } + if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; } + p[i] = 0; + close(fd); + + // parse declarations + line = 1; + next(); + while (tk) { + bt = INT; // basetype + if (tk == Int) next(); + else if (tk == Char) { next(); bt = CHAR; } + else if (tk == Enum) { + next(); + if (tk != '{') next(); + if (tk == '{') { + next(); + i = 0; + while (tk != '}') { + if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } + next(); + if (tk == Assign) { + next(); + n = ast; expr(Cond); + if (*n != Num) { printf("%d: bad enum initializer\n", line); return -1; } + i = n[1]; + } + id[Class] = Num; id[Type] = INT; id[Val] = i++; + if (tk == ',') next(); + } + next(); + } + } + while (tk != ';' && tk != '}') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } + if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } + next(); + id[Type] = ty; + if (tk == '(') { // function + id[Class] = Fun; + id[Val] = (int)e; + next(); i = 2; + while (tk != ')') { + ty = INT; + if (tk == Int) next(); + else if (tk == Char) { next(); ty = CHAR; } + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = i++; + next(); + if (tk == ',') next(); + } + next(); + if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } + i = 0; + next(); + while (tk == Int || tk == Char) { + bt = (tk == Int) ? INT : CHAR; + next(); + while (tk != ';') { + ty = bt; + while (tk == Mul) { next(); ty = ty + PTR; } + if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } + if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = --i; + next(); + if (tk == ',') next(); + } + next(); + } + n = ast; + *--n = ';'; while (tk != '}') { t = n; stmt(); *--n = (int)t; *--n = '{'; } + *--n = -i; *--n = Enter; + gen(n); + id = sym; // unwind symbol table locals + while (id[Tk]) { + if (id[Class] == Loc) { + id[Class] = id[HClass]; + id[Type] = id[HType]; + id[Val] = id[HVal]; + } + id = id + Idsz; + } + } + else { + id[Class] = Glo; + id[Val] = (int)data; + data = data + sizeof(int); + } + if (tk == ',') next(); + } + next(); + } + + if (!idmain[Val]) { printf("main() not defined\n"); return -1; } + if (!src) { + t = (int *)e; + *e++ = 0xb8; *(char ***)e = argv; e = e+4; *e++ = 0x50; // movl $argv, %eax; push %eax + *e++ = 0xb8; *(int *) e = argc; e = e+4; *e++ = 0x50; // movl $argc, %eax; push %eax + *e++ = 0xe8; *(int *)e = idmain[Val] - (int)e - 4; e = e+4; // call main + *e++ = 0x81; *e++ = 0xc4; *(int *)e = 8; e = e+4; // add $8, %esp + *e++ = 0xc3; // ret + qsort(dsym, 2, 1, (void *)t); // hack to call a function pointer + printf("exit(0) from c5x86\n"); + } + return 0; +} diff --git a/w32.h b/w32.h new file mode 100644 index 0000000..548a0ce --- /dev/null +++ b/w32.h @@ -0,0 +1,32 @@ +#include +void *mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off) +{ + HANDLE fm, h; + void *map; + const off_t maxSize = off + (off_t)len; + + h = (HANDLE)_get_osfhandle(fildes); + fm = CreateFileMapping(h, NULL, PAGE_EXECUTE_READWRITE, 0, maxSize, NULL); + map = MapViewOfFile(fm, FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE, 0, off, len); + CloseHandle(fm); + return map; +} + +void *dlsym(void *handle, char *name) +{ + if (!strcmp(name, "open" )) return &open; + if (!strcmp(name, "read" )) return &read; + if (!strcmp(name, "close" )) return &close; + if (!strcmp(name, "printf")) return &printf; + if (!strcmp(name, "malloc")) return &malloc; + if (!strcmp(name, "memset")) return &memset; + if (!strcmp(name, "memcmp")) return &memcmp; + if (!strcmp(name, "memcpy")) return &memcpy; + if (!strcmp(name, "mmap" )) return &mmap; + if (!strcmp(name, "dlsym" )) return &dlsym; + if (!strcmp(name, "qsort" )) return &qsort; + if (!strcmp(name, "exit" )) return &exit; + return 0; +} +#define CHAR TYCHAR +#define INT TYINT