diff --git a/.gitignore b/.gitignore index 6d1ee607..c5cdce22 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ __pycache__/ *.o *.dot *.svg +*.ppm porth \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..32a3e0f3 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "cSpell.enabled": true, + "cSpell.words": [ + "O_WRONLY" + ] +} \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1414840a..1d1d86ca 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,5 +1,9 @@ No contributions are accepted right now since the language is not -stablized yet and I'm still actively experimenting with the design. +stablized yet and I'm actively experimenting with the +design. Contributions will be opened later when I feel that the +language is stable enough. Feel free to do whatever you want with the +source code itself according to the MIT license though. -Feel free to do whatever you want with the code itself according to -the MIT license though. +If you have any questions about the language or want to report bugs +(NO FEATURE REQUESTS) please send them to reximkut@gmail.com or +tsodingbiz@gmail.com diff --git a/README.md b/README.md index 585e89dd..d37c3726 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Porth -**WARNING! THE DEVELOPMENT IS MOVED TO GITLAB: https://gitlab.com/tsoding/porth** +**WARNING! THIS LANGUAGE IS A WORK IN PROGRESS! ANYTHING CAN CHANGE AT ANY MOMENT WITHOUT ANY NOTICE! USE THIS LANGUAGE AT YOUR OWN RISK!** It's like [Forth](https://en.wikipedia.org/wiki/Forth_(programming_language)) but written in [Python](https://www.python.org/). But I don't actually know for sure since I never programmed in Forth, I only heard that it's some sort of stack-based programming language. Porth is also stack-based programming language. Which makes it just like Forth am I rite? @@ -9,16 +9,21 @@ Porth is planned to be - [x] Native - [x] Stack-based (just like Forth) - [x] [Turing-complete](./examples/rule110.porth) -- [x] Statically typed (the type checking is probably gonna be similar to the [WASM validation](https://binji.github.io/posts/webassembly-type-checking/)) -- [ ] Self-hosted (Python is used only as an initial bootstrap, once the language is mature enough we gonna rewrite it in itself) +- [x] Statically typed (the type checking is similar to [WASM validation](https://binji.github.io/posts/webassembly-type-checking/)) +- [ ] Self-hosted (See [./porth.porth](./porth.porth) for the current progress) +- [ ] Optimized (these are not the selling points, but rather milestones of the development) +## The Use Case for The Language + +Porth is a Computer [Programming Language](https://en.wikipedia.org/wiki/Programming_language). It's designed to write programs for [Computers](https://en.wikipedia.org/wiki/Computer). + ## Examples Hello, World: -```pascal +```porth include "std.porth" "Hello, World\n" puts @@ -26,7 +31,7 @@ include "std.porth" Simple program that prints numbers from 0 to 99 in an ascending order: -```pascal +```porth include "std.porth" 100 0 while 2dup > do @@ -112,7 +117,7 @@ By default the compiler searches files to include in `./` and `./std/`. You can This is what the language supports so far. **Since the language is a work in progress everything in this section is the subject to change.** -### Data Types +### Literals #### Integer @@ -120,7 +125,7 @@ Currently an integer is anything that is parsable by [int](https://docs.python.o Example: -```pascal +```porth 10 20 + ``` @@ -139,7 +144,7 @@ Those, a single string pushes two values onto the data stack: the size and the p Example: -``` +```porth include "std.porth" "Hello, World" puts ``` @@ -154,13 +159,13 @@ The size and the pointer are provided by the string `"Hello, World"`. It's like a regular string but it does not push its size on the stack and implicitly ends with [NULL-terminator](https://en.wikipedia.org/wiki/Null-terminated_string). Designed specifically to interact with C code or any other kind of code that expects NULL-terminated strings. -``` +```porth include "std.porth" -O_RDONLY "input.txt"c AT_FDCWD openat -// ^ -// | -// postfix that indicates a C-style string +0 O_RDONLY "input.txt"c AT_FDCWD openat +// ^ +// | +// postfix that indicates a C-style string if dup 0 < do "ERROR: could not open the file\n" eputs @@ -182,195 +187,77 @@ When compiler encounters a character it pushes its value as an integer onto the Example: -``` +```porth 'E' print ``` This program pushes integer `69` onto the stack (since the ASCII code of letter `E` is `69`) and prints it with the `print` operation. -### Built-in Words +### Intrinsics (Built-in Words) #### Stack Manipulation -- `dup` - duplicate an element on top of the stack. -``` -a = pop() -push(a) -push(a) -``` -- `swap` - swap 2 elements on the top of the stack. -``` -a = pop() -b = pop() -push(a) -push(b) -``` -- `drop` - drops the top element of the stack. -``` -pop() -``` -- `print` - print the element on top of the stack in a free form to stdout and remove it from the stack. -``` -a = pop() -print(a) -``` -- `over` -``` -a = pop() -b = pop() -push(b) -push(a) -push(b) -``` -- `rot` - rotate the top three stack elements. -``` -a = pop() -b = pop() -c = pop() -push(b) -push(a) -push(c) -``` +| Name | Signature | Description | +| --- | --- | --- | +| `dup` | `a -- a a` | duplicate an element on top of the stack. | +| `swap` | `a b -- b a` | swap 2 elements on the top of the stack. | +| `drop` | `a b -- a` | drops the top element of the stack. | +| `print` | `a b -- a` | print the element on top of the stack in a free form to stdout and remove it from the stack. | +| `over` | `a b -- a b a` | copy the element below the top of the stack | +| `rot` | `a b c -- b c a` | rotate the top three stack elements. | #### Comparison -- `=` - checks if two elements on top of the stack are equal. Removes the elements from the stack and pushes `1` if they are equal and `0` if they are not. -``` -a = pop() -b = pop() -push(int(a == b)) -``` -- `!=` - checks if two elements on top of the stack are not equal. -``` -a = pop() -b = pop() -push(int(a != b)) -``` -- `>` - checks if the element below the top greater than the top. -``` -b = pop() -a = pop() -push(int(a > b)) -``` -- `<` - checks if the element below the top less than the top. -``` -b = pop() -a = pop() -push(int(a < b)) -``` -- `>=` -``` -b = pop() -a = pop() -push(int(a >= b)) -``` -- `<=` -``` -b = pop() -a = pop() -push(int(a >= b)) -``` +| Name | Signature | Description | +| --- | --- | --- | +| `= ` | `[a: int] [b: int] -- [a == b : bool]` | checks if two elements on top of the stack are equal. | +| `!=` | `[a: int] [b: int] -- [a != b : bool]` | checks if two elements on top of the stack are not equal. | +| `> ` | `[a: int] [b: int] -- [a > b : bool]` | applies the greater comparison on top two elements. | +| `< ` | `[a: int] [b: int] -- [a < b : bool]` | applies the less comparison on top two elements. | +| `>=` | `[a: int] [b: int] -- [a >= b : bool]` | applies the greater or equal comparison on top two elements | +| `<=` | `[a: int] [b: int] -- [a <= b : bool]` | applies the greater or equal comparison on top two elements. | #### Arithmetic -- `+` - sums up two elements on the top of the stack. -``` -a = pop() -b = pop() -push(a + b) -``` -- `-` - subtracts the top of the stack from the element below. -``` -a = pop() -b = pop() -push(b - a) -``` -- `*` - multiples the top of the stack with the element below the top of the stack -``` -a = pop() -b = pop() -push(b * a) -``` -- `divmod` -``` -a = pop() -b = pop() -push(b // a) -push(b % a) -``` +| Name | Signature | Description | +| --- | --- | --- | +| `+` | `[a: int] [b: int] -- [a + b: int]` | sums up two elements on the top of the stack. | +| `-` | `[a: int] [b: int] -- [a - b: int]` | subtracts two elements on the top of the stack | +| `*` | `[a: int] [b: int] -- [a * b: int]` | multiples two elements on top of the stack | +| `divmod` | `[a: int] [b: int] -- [a / b: int] [a % b: int]` | perform [Euclidean division](https://en.wikipedia.org/wiki/Euclidean_division) between two elements on top of the stack. | #### Bitwise -- `shr` -``` -a = pop() -b = pop() -push(b >> a) -``` -- `shl` -``` -a = pop() -b = pop() -push(b << a) -``` -- `or` -``` -a = pop() -b = pop() -push(b | a) -``` -- `and` -``` -a = pop() -b = pop() -push(b & a) -``` -- `not` -``` -a = pop() -push(~a) -``` - -#### Control Flow - -- `if do else end` - pops the element on top of the stack and if the element is not `0` executes the ``, otherwise ``. -- `while do end` - keeps executing both `` and `` until `` produces `0` at the top of the stack. Checking the result of the `` removes it from the stack. +| Name | Signature | Description | +| --- | --- | --- | +| `shr` | `[a: int] [b: int] -- [a >> b: int]` | right **unsigned** bit shift. | +| `shl` | `[a: int] [b: int] -- [a << b: int]` | light bit shift. | +| `or` | `[a: int] [b: int] -- [a \| b: int]` | bit `or`. | +| `and` | `[a: int] [b: int] -- [a & b: int]` | bit `and`. | +| `not` | `[a: int] -- [~a: int]` | bit `not`. | #### Memory -- `mem` - pushes the address of the beginning of the memory where you can read and write onto the stack. -``` -push(mem_addr) -``` -- `.` - store a given byte at the address on the stack. -``` -byte = pop() -addr = pop() -store(addr, byte) -``` -- `,` - load a byte from the address on the stack. -``` -addr = pop() -byte = load(addr) -push(byte) -``` -- `.64` - store an 8-byte word at the address on the stack. -``` -word = pop() -addr = pop() -store(addr, word) -``` -- `,64` - load an 8-byte word from the address on the stack. -``` -word = pop() -byte = load(word) -push(byte) -``` +| Name | Signature | Description | +| --- | --- | --- | +| `mem` | `-- [mem: ptr]` | pushes the address of the beginning of the memory where you can read and write onto the stack. | +| `!8` | `[byte: int] [place: ptr] -- ` | store a given byte at the address on the stack. | +| `@8` | `[place: ptr] -- [byte: int]` | load a byte from the address on the stack. | +| `!16` | `[byte: int] [place: ptr] --` | store an 2-byte word at the address on the stack. | +| `@16` | `[place: ptr] -- [byte: int]` | load an 2-byte word from the address on the stack. | +| `!32` | `[byte: int] [place: ptr] --` | store an 4-byte word at the address on the stack. | +| `@32` | `[place: ptr] -- [byte: int]` | load an 4-byte word from the address on the stack. | +| `!64` | `[byte: int] [place: ptr] --` | store an 8-byte word at the address on the stack. | +| `@64` | `[place: ptr] -- [byte: int]` | load an 8-byte word from the address on the stack. | +| `cast(int)` | `[a: any] -- [a: int]` | cast the element on top of the stack to `int` | +| `cast(bool)` | `[a: any] -- [a: bool]` | cast the element on top of the stack to `bool` | +| `cast(ptr)` | `[a: any] -- [a: ptr]` | cast the element on top of the stack to `ptr` | #### System - `syscall` - perform a syscall with n arguments where n is in range `[0..6]`. (`syscall1`, `syscall2`, etc) -``` + +```porth syscall_number = pop() for i in range(n): @@ -379,11 +266,58 @@ for i in range(n): ``` +#### Misc + +- `here (-- [len: int] [str: ptr])` - pushes a string `"::"` where `` is the path to the file where `here` is located, `` is the row on which `here` is located and `` is the column from which `here` starts. It is useful for reporting developer errors: + +```porth +include "std.porth" + +here puts ": TODO: not implemented\n" puts 1 exit +``` + +- `argc (-- [argc: int])` +- `argv (-- [argv: ptr])` + +### std.porth + +TBD + + + +### Control Flow + +#### if-condition + + + +```porth + if + +else if* + +else if* + +else + +end +``` + +#### while-loop + + + +```porth +while do + +end +``` + ### Macros Define a new word `write` that expands into a sequence of tokens `stdout SYS_write syscall3` during the compilation. -``` +```porth macro write stdout SYS_write syscall3 end @@ -393,16 +327,114 @@ end Include tokens of file `file.porth` -``` +```porth include "file.porth" ``` -### Misc +### Procedures + + + +```porth +proc seq // n -- + while dup 0 > do + dup print + 1 - + end drop +end +``` + +### Constants + + + +```porth +const N 69 end +const M 420 end +const K M N / end +``` + +### Memory -- `here` - pushes a string `"::"` where `` is the path to the file where `here` is located, `` is the row on which `here` is located and `` is the column from which `here` starts. It is useful for reporting developer errors: + -```pascal +#### Global Memory + +```porth include "std.porth" -here puts ": TODO: not implemented\n" puts 1 exit +const N 26 end +memory buffer N end + +0 while dup N < do + dup 'a' + + over buffer + + !8 + + 1 + +end drop + +N buffer puts +``` + +#### Local Memory + +```porth +include "std.porth" + +proc fib // n -- + memory a sizeof(u64) end + memory b sizeof(u64) end + + dup 1 > if + dup 1 - fib a !64 + dup 2 - fib b !64 + a @64 b @64 + + end +end ``` + +### offset/reset + + + +#### Enums + +```porth +include "std.porth" + +const MON 1 offset end +const TUE 1 offset end +const WED 1 offset end +const THU 1 offset end +const FRI 1 offset end +const SAT 1 offset end +const SUN 1 offset end +const WEEK_DAYS reset end + +"There is " puts WEEK_DAYS putd " days in a week\n" puts +``` + +#### Structs + +```porth +include "std.porth" + +const offsetof(Str.count) sizeof(u64) offset end +const offsetof(Str.data) sizeof(ptr) offset end +const sizeof(Str) reset end +``` + +### Type Checking + +TBD + + + +#### Types of Porth + +- `int` - 64 bit integer +- `bool` - boolean +- `ptr` - pointer + +TBD diff --git a/cspell.json b/cspell.json new file mode 100644 index 00000000..c68fc563 --- /dev/null +++ b/cspell.json @@ -0,0 +1,285 @@ +{ + "version": "0.2", + "ignorePaths": [], + "dictionaryDefinitions": [], + "dictionaries": [], + "words": [ + "ABSTIME", + "adjtime", + "adjtimex", + "Alexey", + "alist", + "atim", + "autoload", + "blksize", + "capget", + "capset", + "chdir", + "cmove", + "cmovg", + "cmovge", + "cmovl", + "cmovle", + "cmovne", + "consts", + "creat", + "cstr", + "cstreq", + "cstrlen", + "ctim", + "curr", + "dcookie", + "defconst", + "delim", + "dirfd", + "divmod", + "elif", + "ENOENT", + "envp", + "epoll", + "eputd", + "eputs", + "eval", + "eventfd", + "execve", + "faccessat", + "fadvise", + "fallocate", + "fanotify", + "fchdir", + "fchmod", + "fchmodat", + "fchown", + "fchownat", + "fcntl", + "fdatasync", + "FDCWD", + "felf", + "fgetxattr", + "finit", + "flistxattr", + "fputd", + "fputs", + "fputu", + "fremovexattr", + "fsetxattr", + "fstat", + "fstatfs", + "ftruncate", + "futex", + "futimesat", + "getaffinity", + "getcpu", + "getcwd", + "getdents", + "getegid", + "geteuid", + "getevents", + "getgid", + "getgroups", + "getitimer", + "getoverrun", + "getparam", + "getpeername", + "getpgid", + "getpgrp", + "getpid", + "getpmsg", + "getppid", + "getpriority", + "getres", + "getresgid", + "getresuid", + "getrlimit", + "getrusage", + "getscheduler", + "getsetattr", + "getsid", + "getsockname", + "getsockopt", + "gettid", + "gettime", + "gettimeofday", + "getuid", + "getxattr", + "inotify", + "Intrin", + "Intrinsics", + "INTRINSICS", + "ioperm", + "iopl", + "ioprio", + "isdigit", + "kcmp", + "kexec", + "keyctl", + "Kutepov", + "lchown", + "lgetxattr", + "linkat", + "listxattr", + "llistxattr", + "lnot", + "lremovexattr", + "lseek", + "lsetxattr", + "madvise", + "malloc", + "MALLOC", + "mbind", + "memcpy", + "mempolicy", + "ment", + "mincore", + "mkdir", + "mkdirat", + "mknod", + "mknodat", + "mlock", + "mlockall", + "mmap", + "mprotect", + "mremap", + "msgctl", + "msgget", + "msgrcv", + "msgsnd", + "msync", + "mtim", + "munlock", + "munlockall", + "munmap", + "nanosleep", + "nasm", + "newfstatat", + "nfsservctl", + "nlink", + "offsetof", + "openat", + "porth", + "ppoll", + "prctl", + "pread", + "preadv", + "prlimit", + "procs", + "pselect", + "pstr", + "ptrace", + "ptrs", + "putd", + "PUTD", + "putpmsg", + "putu", + "pwait", + "pwrite", + "pwritev", + "quotactl", + "rdev", + "RDONLY", + "RDWR", + "readahead", + "readlink", + "readlinkat", + "readv", + "recvfrom", + "recvmmsg", + "recvmsg", + "removexattr", + "renameat", + "resb", + "resq", + "returncode", + "rtokens", + "sched", + "semctl", + "semget", + "semop", + "semtimedop", + "sendfile", + "sendmmsg", + "sendmsg", + "sendto", + "setaffinity", + "setdomainname", + "setfsgid", + "setfsuid", + "setgid", + "setgroups", + "sethostname", + "setitimer", + "setns", + "setparam", + "setpgid", + "setpriority", + "setq", + "setregid", + "setresgid", + "setresuid", + "setreuid", + "setrlimit", + "setscheduler", + "setsid", + "setsockopt", + "settime", + "settimeofday", + "setuid", + "setxattr", + "shmat", + "shmctl", + "shmdt", + "shmget", + "sigaction", + "sigaltstack", + "signalfd", + "sigpending", + "sigprocmask", + "sigqueueinfo", + "sigreturn", + "sigsuspend", + "sigtimedwait", + "socketpair", + "statbuf", + "statfs", + "streq", + "strs", + "struct", + "Structs", + "swapoff", + "swapon", + "symlinkat", + "syms", + "syncfs", + "syscall", + "syscalls", + "Syscalls", + "sysfs", + "sysinfo", + "tgkill", + "tgsigqueueinfo", + "timedreceive", + "timedsend", + "timerfd", + "timespec", + "tkill", + "TRUNC", + "Tsvg", + "tuxcall", + "unlinkat", + "uselib", + "ustat", + "utime", + "utimensat", + "utimes", + "vfork", + "vhangup", + "vmsplice", + "vserver", + "waitid", + "waitpid", + "writev", + "wstatus" + ], + "ignoreWords": [], + "import": [] +} diff --git a/editor/porth-mode.el b/editor/porth-mode.el index 558c3b4a..df586b8a 100644 --- a/editor/porth-mode.el +++ b/editor/porth-mode.el @@ -42,7 +42,7 @@ (eval-and-compile (defconst porth-keywords - '("if" "elif" "else" "end" "while" "do" "macro" "include"))) + '("if" "else" "while" "do" "macro" "include" "memory" "proc" "const" "end" "offset" "reset"))) (defconst porth-highlights `((,(regexp-opt porth-keywords 'symbols) . font-lock-keyword-face))) diff --git a/editor/porth.vim b/editor/porth.vim index 201c08dd..8336aa04 100644 --- a/editor/porth.vim +++ b/editor/porth.vim @@ -13,7 +13,7 @@ endif syntax keyword porthTodos TODO XXX FIXME NOTE " Language keywords -syntax keyword porthKeywords if elif else end while do macro include +syntax keyword porthKeywords if else while do macro include memory proc const end offset reset " Comments syntax region porthCommentLine start="//" end="$" contains=porthTodos @@ -24,7 +24,7 @@ syntax region porthString start=/\v'/ skip=/\v\\./ end=/\v'/ " Set highlights highlight default link porthTodos Todo -highlight default link porthKeywords Identifier +highlight default link porthKeywords Keyword highlight default link porthCommentLine Comment highlight default link porthString String diff --git a/euler/.gitignore b/euler/.gitignore index 9735f282..a46c8bea 100644 --- a/euler/.gitignore +++ b/euler/.gitignore @@ -4,4 +4,7 @@ problem03 problem04 problem05 problem06 -problem07 \ No newline at end of file +problem07 +problem08 +problem09 +problem10 \ No newline at end of file diff --git a/euler/problem01.porth b/euler/problem01.porth index 9d6c6e1c..1a2a4d80 100644 --- a/euler/problem01.porth +++ b/euler/problem01.porth @@ -1,10 +1,9 @@ include "std.porth" 0 3 while dup 1000 < do - if dup 3 mod 0 = - over 5 mod 0 = - or - do + dup 3 mod 0 = + over 5 mod 0 = + or if swap over + swap end diff --git a/euler/problem02.porth b/euler/problem02.porth index 3f501a17..e1df4b37 100644 --- a/euler/problem02.porth +++ b/euler/problem02.porth @@ -1,12 +1,12 @@ include "std.porth" -macro acc mem end +memory acc sizeof(u64) end 1 2 while over 4000000 < do - if over 2 mod 0 = do - over acc ,64 + acc swap .64 + over 2 mod 0 = if + over acc @64 + acc !64 end swap over + end 2drop -acc ,64 print +acc @64 print diff --git a/euler/problem03.porth b/euler/problem03.porth index 63370070..76da647f 100644 --- a/euler/problem03.porth +++ b/euler/problem03.porth @@ -1,7 +1,7 @@ include "std.porth" 600851475143 2 while over 1 > do - if 2dup mod 0 = do + 2dup mod 0 = if swap over / swap else 1 + diff --git a/euler/problem04.porth b/euler/problem04.porth index 30ebcc90..064fd327 100644 --- a/euler/problem04.porth +++ b/euler/problem04.porth @@ -1,7 +1,7 @@ // WARNING! This problem is extremely slow in Simulation Mode with CPython! Use PyPy or Compilation Mode! include "std.porth" -macro ans mem end +memory ans sizeof(u64) end 100 while dup 1000 < do 100 while dup 1000 < do @@ -16,9 +16,9 @@ macro ans mem end swap drop // a b - if 2dup = do - if dup ans ,64 > do - ans over .64 + 2dup = if + dup ans @64 > if + ans over swap !64 end end @@ -29,4 +29,4 @@ macro ans mem end 1 + end drop -ans ,64 print +ans @64 print diff --git a/euler/problem05.porth b/euler/problem05.porth index 72839c86..9ecbf831 100644 --- a/euler/problem05.porth +++ b/euler/problem05.porth @@ -2,22 +2,21 @@ include "std.porth" macro N 20 end -// |acc |tmp |ans| -macro acc mem end -macro tmp mem 8 N * + end -macro ans tmp 8 N * + end +memory acc sizeof(u64) N * end +memory tmp sizeof(u64) N * end +memory ans sizeof(u64) end 2 while dup N <= do // clean up the tmp table 0 while dup N < do - tmp over 8 * + 0 .64 + tmp over 8 * + 0 swap !64 1 + end drop // factorize dup 2 while over 1 > do - if 2dup mod 0 = do + 2dup mod 0 = if dup 8 * tmp + inc64 swap over / swap else @@ -26,13 +25,12 @@ macro ans tmp 8 N * + end end 2drop 0 while dup N < do - if dup 8 * acc + ,64 - over 8 * tmp + ,64 - < - do + dup 8 * acc + @64 + over 8 * tmp + @64 + < if dup 8 * acc + - over 8 * tmp + ,64 - .64 + over 8 * tmp + @64 + swap !64 end 1 + @@ -41,15 +39,15 @@ macro ans tmp 8 N * + end 1 + end drop -ans 1 .64 +1 ans !64 0 while dup N < do - acc over 8 * + ,64 while dup 0 > do - over ans ,64 * ans swap .64 + acc over 8 * + @64 while dup 0 > do + over ans @64 * ans !64 1 - end drop 1 + end drop -ans ,64 print +ans @64 print diff --git a/euler/problem07.porth b/euler/problem07.porth index ebbd77e2..00c0d27c 100644 --- a/euler/problem07.porth +++ b/euler/problem07.porth @@ -2,34 +2,34 @@ include "std.porth" macro N 10001 end -macro primes-count mem end -macro primes primes-count 8 + end +memory primes-count sizeof(u64) end +memory primes sizeof(u64) N * 10 + end macro is-prime 0 while - if 2dup 8 * primes + ,64 dup * >= do - 2dup 8 * primes + ,64 mod 0 != + 2dup 8 * primes + @64 dup * >= if + 2dup 8 * primes + @64 mod 0 != else false end do 1 + end - 8 * primes + ,64 dup * < + 8 * primes + @64 dup * < end macro add-prime - primes primes-count ,64 8 * + swap .64 + primes primes-count @64 8 * + !64 primes-count inc64 end 2 add-prime -3 while primes-count ,64 N < do - if dup is-prime do +3 while primes-count @64 N < do + dup is-prime if dup add-prime end 1 + end drop -primes N 1 - 8 * + ,64 print +primes N 1 - 8 * + @64 print diff --git a/euler/problem08.porth b/euler/problem08.porth new file mode 100644 index 00000000..ec835cf8 --- /dev/null +++ b/euler/problem08.porth @@ -0,0 +1,29 @@ +include "std.porth" + +macro N 13 end + +memory str sizeof(Str) end +memory acc sizeof(u64) end +memory ans sizeof(u64) end + +"7316717653133062491922511967442657474235534919493496983520312774506326239578318016984801869478851843858615607891129494954595017379583319528532088055111254069874715852386305071569329096329522744304355766896648950445244523161731856403098711121722383113622298934233803081353362766142828064444866452387493035890729629049156044077239071381051585930796086670172427121883998797908792274921901699720888093776657273330010533678812202354218097512545405947522435258490771167055601360483958644670632441572215539753697817977846174064955149290862569321978468622482839722413756570560574902614079729686524145351004748216637048440319989000889524345065854122758866688116427171479924442928230863465674813919123162824586178664583591245665294765456828489128831426076900422421902267105562632111110937054421750694165896040807198403850962455444362981230987879927244284909188845801561660979191338754992005240636899125607176060588611646710940507754100225698315520005593572972571636269561882670428252483600823257530420752963450" str !Str + +0 ans !64 + +0 while dup str @Str.count N - 1 + < do + 1 acc !64 + + 0 while dup N < do + 2dup + str @Str.data cast(ptr) + @8 '0' - + acc @64 * acc !64 + 1 + + end drop + + acc @64 ans @64 > if + acc @64 ans !64 + end + + 1 + +end drop + +ans @64 print diff --git a/euler/problem08.txt b/euler/problem08.txt new file mode 100644 index 00000000..42b0fc54 --- /dev/null +++ b/euler/problem08.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 12 +23514624000 + +:b stderr 0 + diff --git a/euler/problem09.porth b/euler/problem09.porth new file mode 100644 index 00000000..570e2c46 --- /dev/null +++ b/euler/problem09.porth @@ -0,0 +1,24 @@ +include "std.porth" + +memory a sizeof(u64) end +memory b sizeof(u64) end +memory c sizeof(u64) end + +1 while dup 1000 < do + dup a !64 + 1 while dup a @64 + 1000 < do + dup b !64 + 1000 a @64 - b @64 - c !64 + + a @64 dup * b @64 dup * + c @64 dup * = if + a @64 b @64 * c @64 * print + 0 exit + end + + 1 + + end drop + 1 + +end drop + +here eputs ": unreachable\n" eputs +1 exit diff --git a/euler/problem09.txt b/euler/problem09.txt new file mode 100644 index 00000000..05b8a609 --- /dev/null +++ b/euler/problem09.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 9 +31875000 + +:b stderr 0 + diff --git a/euler/problem10.porth b/euler/problem10.porth new file mode 100644 index 00000000..ed4a9391 --- /dev/null +++ b/euler/problem10.porth @@ -0,0 +1,38 @@ +include "std.porth" + +// TODO: this solution is too slow for simulation mode even with pypy +// It takes ~1min with `pypy ./test.py run ./euler/problem10.porth` on my machine +// It's actually relatively slow even in the compilation mode (~1sec). +// Maybe we could come up with a faster method of computer prime numbers? +// We could precompute them and save the to a file I guess. + +memory ans sizeof(u64) end +memory primes-count sizeof(u64) end +memory primes sizeof(u32) 1000000 * end + +macro push-prime // value -- + primes-count @32 sizeof(u32) * primes + !32 + primes-count inc32 +end + +macro is-prime // [value: int] -> [ret: bool] + 0 while + 2dup sizeof(u32) * primes + @32 dup * >= if + 2dup sizeof(u32) * primes + @32 mod 0 != + else false end + do 1 + end + sizeof(u32) * primes + @32 dup * < +end + +2 push-prime +2 ans !64 + +3 while dup 2000000 < do + dup is-prime if + dup push-prime + dup ans @64 + ans !64 + end + 1 + +end drop + +ans @64 print diff --git a/examples/.gitignore b/examples/.gitignore index 314b2462..b0c42d1b 100644 --- a/examples/.gitignore +++ b/examples/.gitignore @@ -8,3 +8,4 @@ seq fib gol fizz-buzz +checker \ No newline at end of file diff --git a/examples/cat.porth b/examples/cat.porth index f089d802..50d21d5c 100644 --- a/examples/cat.porth +++ b/examples/cat.porth @@ -1,33 +1,40 @@ include "std.porth" +memory fd sizeof(u64) end +macro @fd fd @64 end +macro !fd fd !64 end + macro BUFFER_CAP 1024 end +memory buffer BUFFER_CAP end -// memory layout -macro fd mem end -macro buffer fd 8 + end +memory file_path_cstr sizeof(ptr) end +macro @file_path_cstr file_path_cstr @64 cast(ptr) end +macro !file_path_cstr file_path_cstr !64 end macro cat_fd - while BUFFER_CAP buffer fd ,64 read dup 0 > do + while BUFFER_CAP buffer @fd read dup 0 > do buffer puts end drop end -if argc 2 < do - fd stdin .64 +argc 2 < if + stdin !fd cat_fd else 1 while dup argc < do - O_RDONLY over nth_argv AT_FDCWD openat + dup nth_argv !file_path_cstr + + 0 O_RDONLY @file_path_cstr AT_FDCWD openat - if dup 0 < do + dup 0 < if "ERROR: could not open file " eputs - over nth_argv dup cstrlen swap eputs + @file_path_cstr cstr-to-str eputs "\n" eputs drop else - fd swap .64 + fd !64 cat_fd - fd ,64 close drop + fd @64 close drop end 1 + diff --git a/examples/cat.txt b/examples/cat.txt index b773db37..dcdad2a6 100644 --- a/examples/cat.txt +++ b/examples/cat.txt @@ -8,37 +8,44 @@ foo :b stdin 0 :i returncode 0 -:b stdout 590 +:b stdout 786 include "std.porth" +memory fd sizeof(u64) end +macro @fd fd @64 end +macro !fd fd !64 end + macro BUFFER_CAP 1024 end +memory buffer BUFFER_CAP end -// memory layout -macro fd mem end -macro buffer fd 8 + end +memory file_path_cstr sizeof(ptr) end +macro @file_path_cstr file_path_cstr @64 cast(ptr) end +macro !file_path_cstr file_path_cstr !64 end macro cat_fd - while BUFFER_CAP buffer fd ,64 read dup 0 > do + while BUFFER_CAP buffer @fd read dup 0 > do buffer puts end drop end -if argc 2 < do - fd stdin .64 +argc 2 < if + stdin !fd cat_fd else 1 while dup argc < do - O_RDONLY over nth_argv AT_FDCWD openat + dup nth_argv !file_path_cstr + + 0 O_RDONLY @file_path_cstr AT_FDCWD openat - if dup 0 < do + dup 0 < if "ERROR: could not open file " eputs - over nth_argv dup cstrlen swap eputs + @file_path_cstr cstr-to-str eputs "\n" eputs drop else - fd swap .64 + fd !64 cat_fd - fd ,64 close drop + fd @64 close drop end 1 + diff --git a/examples/checker.porth b/examples/checker.porth new file mode 100644 index 00000000..b57f0033 --- /dev/null +++ b/examples/checker.porth @@ -0,0 +1,73 @@ +// $ ./porth.py com ./checker.porth +// $ ./checker output.ppm +// $ feh ./output.ppm + +include "std.porth" + +argc 2 < if + "Usage: ./checker \n" eputs + "[ERROR] no output file path is provided\n" eputs + 1 exit +end + +macro WIDTH 512 end +macro HEIGHT 512 end +macro CELL_WIDTH 64 end +macro CELL_HEIGHT 64 end +macro sizeof(pixel) 3 end + +memory canvas sizeof(pixel) WIDTH * HEIGHT * end + +0 while dup HEIGHT < do + 0 while dup WIDTH < do + 2dup CELL_WIDTH / + swap CELL_HEIGHT / + + + 2 % + 0 = if + 2dup swap WIDTH * + sizeof(pixel) * canvas + + dup 255 swap !8 1 + + dup 0 swap !8 1 + + dup 255 swap !8 drop + else + 2dup swap WIDTH * + sizeof(pixel) * canvas + + dup 0 swap !8 1 + + dup 0 swap !8 1 + + dup 0 swap !8 drop + end + 1 + + end drop + 1 + +end drop + +memory file_path_cstr sizeof(ptr) end +macro @file_path_cstr file_path_cstr @64 cast(ptr) end +macro @file_path @file_path_cstr cstr-to-str end +1 nth_argv file_path_cstr !64 + +memory fd sizeof(u64) end +macro @fd fd @64 end +macro !fd fd !64 end + +"[INFO] Generating " puts @file_path puts "\n" puts + +420 +O_CREAT O_WRONLY or +@file_path_cstr +AT_FDCWD +openat +!fd + +@fd 0 < if + "[ERROR] could not open file `" eputs + @file_path eputs + "`\n" eputs + 1 exit +end + +"P6\n" @fd fputs +WIDTH @fd fputu +" " @fd fputs +HEIGHT @fd fputu +" 255\n" @fd fputs +WIDTH HEIGHT * sizeof(pixel) * canvas @fd fputs diff --git a/examples/fizz-buzz.porth b/examples/fizz-buzz.porth index 96e9d07d..f205d1d2 100644 --- a/examples/fizz-buzz.porth +++ b/examples/fizz-buzz.porth @@ -1,11 +1,11 @@ include "std.porth" 1 while dup 100 < do - if dup 15 mod 0 = do + dup 15 mod 0 = if "FizzBuzz\n" puts - elif dup 3 mod 0 = do + else dup 3 mod 0 = if* "Fizz\n" puts - elif dup 5 mod 0 = do + else dup 5 mod 0 = if* "Buzz\n" puts else dup print diff --git a/examples/gol.porth b/examples/gol.porth index badc6c54..fff5b3c7 100644 --- a/examples/gol.porth +++ b/examples/gol.porth @@ -4,61 +4,42 @@ include "std.porth" macro ROWS 10 end macro COLS 20 end macro BOARD_SIZE ROWS COLS * end -macro PUTD_BUFFER_CAP 32 end // memory layout -macro putd_buffer mem end -macro delta_time putd_buffer PUTD_BUFFER_CAP + end -macro board_current_index delta_time 16 + end -macro nbors board_current_index 8 + end -macro value nbors 8 + end -macro board_base value 8 + end -macro display BOARD_SIZE 2 * board_base + end - -macro putd - if dup 0 = do - "0" puts - else - putd_buffer PUTD_BUFFER_CAP + - while over 0 > do - 1 - dup rot - 10 divmod - rot swap '0' + . swap - end - - dup - putd_buffer PUTD_BUFFER_CAP + swap - swap puts - end - drop -end - -macro board_current - board_base board_current_index ,64 BOARD_SIZE * + +memory delta_time sizeof(timespec) end +memory board_current_index sizeof(u64) end +memory nbors sizeof(u64) end +memory value sizeof(u64) end +memory board_base BOARD_SIZE 2 * end +memory display COLS 1 + end + +proc board_current + board_base board_current_index @64 BOARD_SIZE * + end -macro board_next - board_base 1 board_current_index ,64 - BOARD_SIZE * + +proc board_next + board_base 1 board_current_index @64 - BOARD_SIZE * + end -macro swap_boards - board_current_index 1 board_current_index ,64 - .64 +proc swap_boards + 1 board_current_index @64 - board_current_index !64 end -macro display_row +proc display_row 0 while dup COLS < do - if 2dup + , 0 = do - display over + '.' . + 2dup + @8 0 = if + display over + '.' swap !8 else - display over + '#' . + display over + '#' swap !8 end 1 + end drop - COLS display + '\n' . + COLS display + '\n' swap !8 COLS 1 + display puts drop end -macro display_board +proc display_board 0 while dup ROWS < do 2dup COLS * + display_row 1 + @@ -66,79 +47,79 @@ macro display_board drop end -macro display_current_board +proc display_current_board board_current display_board end -macro get_current_cell - swap COLS * + board_current + , +proc get_current_cell + swap COLS * + board_current + @8 end -macro set_next_cell - value swap .64 +proc set_next_cell + value !64 swap COLS * + board_next + - value ,64 - . + value @64 + swap !8 end -macro in_bounds +proc in_bounds dup 0 >= swap COLS < and swap dup 0 >= swap ROWS < and and end -macro count_current_nbors - nbors 0 .64 +proc count_current_nbors + 0 nbors !64 - if 2dup 1 - swap 1 - swap 2dup in_bounds - rot rot swap COLS * + board_current + , 1 = - and do nbors inc64 end + 2dup 1 - swap 1 - swap 2dup in_bounds + rot rot swap COLS * + board_current + @8 1 = + and if nbors inc64 end - if 2dup 1 - 2dup in_bounds - rot rot swap COLS * + board_current + , 1 = - and do nbors inc64 end + 2dup 1 - 2dup in_bounds + rot rot swap COLS * + board_current + @8 1 = + and if nbors inc64 end - if 2dup 1 - swap 1 + swap 2dup in_bounds - rot rot swap COLS * + board_current + , 1 = - and do nbors inc64 end + 2dup 1 - swap 1 + swap 2dup in_bounds + rot rot swap COLS * + board_current + @8 1 = + and if nbors inc64 end - if 2dup swap 1 - swap 2dup in_bounds - rot rot swap COLS * + board_current + , 1 = - and do nbors inc64 end + 2dup swap 1 - swap 2dup in_bounds + rot rot swap COLS * + board_current + @8 1 = + and if nbors inc64 end - if 2dup swap 1 + swap 2dup in_bounds - rot rot swap COLS * + board_current + , 1 = - and do nbors inc64 end + 2dup swap 1 + swap 2dup in_bounds + rot rot swap COLS * + board_current + @8 1 = + and if nbors inc64 end - if 2dup 1 + swap 1 - swap 2dup in_bounds - rot rot swap COLS * + board_current + , 1 = - and do nbors inc64 end + 2dup 1 + swap 1 - swap 2dup in_bounds + rot rot swap COLS * + board_current + @8 1 = + and if nbors inc64 end - if 2dup 1 + 2dup in_bounds - rot rot swap COLS * + board_current + , 1 = - and do nbors inc64 end + 2dup 1 + 2dup in_bounds + rot rot swap COLS * + board_current + @8 1 = + and if nbors inc64 end - if 2dup 1 + swap 1 + swap 2dup in_bounds - rot rot swap COLS * + board_current + , 1 = - and do nbors inc64 end + 2dup 1 + swap 1 + swap 2dup in_bounds + rot rot swap COLS * + board_current + @8 1 = + and if nbors inc64 end 2drop - nbors ,64 + nbors @64 end -macro compute_next_board +proc compute_next_board 0 while dup ROWS < do 0 while dup COLS < do - if 2dup get_current_cell 1 = do + 2dup get_current_cell 1 = if 2dup count_current_nbors - if dup 2 = swap 3 = or do + dup 2 = swap 3 = or if 2dup 1 set_next_cell else 2dup 0 set_next_cell end else - if 2dup count_current_nbors 3 = do + 2dup count_current_nbors 3 = if 2dup 1 set_next_cell else 2dup 0 set_next_cell @@ -154,17 +135,17 @@ end // .*. // ..* // *** -macro put_glider - dup 0 COLS * 1 + + 1 . - dup 1 COLS * 2 + + 1 . - dup 2 COLS * 0 + + 1 . - dup 2 COLS * 1 + + 1 . - dup 2 COLS * 2 + + 1 . +proc put_glider + dup 0 COLS * 1 + + 1 swap !8 + dup 1 COLS * 2 + + 1 swap !8 + dup 2 COLS * 0 + + 1 swap !8 + dup 2 COLS * 1 + + 1 swap !8 + dup 2 COLS * 2 + + 1 swap !8 drop end -macro main - delta_time 8 + 100000000 .64 +proc main + 100000000 delta_time 8 + !64 board_current put_glider @@ -174,8 +155,8 @@ macro main swap_boards NULL delta_time 0 CLOCK_MONOTONIC clock_nanosleep drop - "\033[" puts ROWS putd "A" puts - "\033[" puts COLS putd "D" puts + "\033[" puts ROWS putu "A" puts + "\033[" puts COLS putu "D" puts end end diff --git a/examples/name.porth b/examples/name.porth index d7c16b26..6f9b9a3b 100644 --- a/examples/name.porth +++ b/examples/name.porth @@ -1,17 +1,17 @@ include "std.porth" macro NAME_CAPACITY 256 end -macro name mem end +memory name NAME_CAPACITY end "What is your name? " puts NAME_CAPACITY name stdin read -if dup 0 <= do +dup 0 <= if "ERROR: could not read your name, sorry ( ._.)\n" eputs 1 exit end -if name over + 1 - , '\n' = do +name over + 1 - @8 '\n' = if 1 - end diff --git a/examples/reverse-linked-list.porth b/examples/reverse-linked-list.porth index e21dcf57..db5b2de5 100644 --- a/examples/reverse-linked-list.porth +++ b/examples/reverse-linked-list.porth @@ -1,30 +1,33 @@ include "./std.porth" -macro ,node/value ,64 end -macro ,node/prev 8 + ,64 end -macro .node/value .64 end -macro .node/prev swap 8 + swap .64 end -macro sizeof(node) 16 end +macro sizeof(Node) 16 end +macro Node.value 0 + end +macro Node.prev 8 + end +macro @Node.value Node.value @64 end +macro @Node.prev Node.prev @64 end +macro !Node.value Node.value !64 end +macro !Node.prev Node.prev !64 end -macro list_a mem end -macro list_b list_a 8 + end +memory list_a sizeof(ptr) end +memory list_b sizeof(ptr) end -macro nodes_count list_b 8 + end -macro nodes nodes_count 8 + end +macro NODES_CAP 1024 end +memory nodes_count sizeof(u64) end +memory nodes sizeof(Node) NODES_CAP * end macro alloc_node - nodes_count ,64 sizeof(node) * nodes + - nodes_count dup ,64 1 + .64 + nodes_count @64 sizeof(Node) * nodes + + nodes_count dup @64 1 + swap !64 end macro push_node - over alloc_node 2dup swap ,64 .node/prev .64 - swap ,64 cast(ptr) swap .node/value + over alloc_node 2dup swap @64 swap !Node.prev swap !64 + swap @64 cast(ptr) !Node.value end macro pop_node - dup ,64 cast(ptr) ,node/value swap - dup ,64 cast(ptr) ,node/prev .64 + dup @64 cast(ptr) @Node.value swap + dup @64 cast(ptr) @Node.prev swap !64 end // initialize list_a @@ -34,11 +37,11 @@ end end drop // reverse list_a into list_b -while list_a ,64 0 != do +while list_a @64 0 != do list_a pop_node list_b swap push_node end // print list_b -while list_b ,64 0 != do +while list_b @64 0 != do list_b pop_node print end diff --git a/examples/rot13.porth b/examples/rot13.porth index d3e1e837..1edde239 100644 --- a/examples/rot13.porth +++ b/examples/rot13.porth @@ -1,20 +1,20 @@ include "std.porth" macro BUFFER_CAP 1024 end -macro buffer mem end +memory buffer BUFFER_CAP end while BUFFER_CAP buffer stdin read dup 0 > do 0 while 2dup > do - dup buffer + , + dup buffer + @8 - if dup 'a' >= over 'z' <= and do + dup 'a' >= over 'z' <= and if 2dup 'a' - 13 + 26 mod 'a' + - swap buffer + swap . + swap buffer + !8 end - if dup 'A' >= over 'Z' <= and do + dup 'A' >= over 'Z' <= and if 2dup 'A' - 13 + 26 mod 'A' + - swap buffer + swap . + swap buffer + !8 end drop diff --git a/examples/rule110.porth b/examples/rule110.porth index b1e1eb5d..6034a946 100644 --- a/examples/rule110.porth +++ b/examples/rule110.porth @@ -4,33 +4,33 @@ include "std.porth" macro N 100 end // Memory layout -macro row mem end -macro display row N + end +memory row N end +memory display N 1 + end -row N 2 - + 1 . -display N + 10 . +row N 2 - + 1 swap !8 +display N + 10 swap !8 0 while dup N 2 - < do 0 while dup N < do - if dup row + , 1 = do - dup display + '*' . + dup row + @8 1 = if + dup display + '*' swap !8 else - dup display + ' ' . + dup display + ' ' swap !8 end 1 + end drop N 1 + display puts - row , 1 shl - row 1 + , + row @8 1 shl + row 1 + @8 or 1 while dup N 2 - < do swap 1 shl 7 and - over row + 1 + , or + over row + 1 + @8 or 2dup 110 swap shr 1 and - swap row + swap . + swap row + !8 swap 1 + diff --git a/examples/seq.porth b/examples/seq.porth index ba953111..9ae27e58 100644 --- a/examples/seq.porth +++ b/examples/seq.porth @@ -1,31 +1,31 @@ include "std.porth" -macro limit mem end +memory limit sizeof(u64) end -if argc 2 < do +argc 2 < if "Usage: seq \n" eputs "ERROR: no limit is provided\n" eputs 1 exit end 1 nth_argv -while dup , 0 != do - if dup , '0' < over , '9' > or do +while dup @8 0 != do + dup @8 '0' < over @8 '9' > or if "ERROR: `" eputs 1 nth_argv cstrlen 1 nth_argv eputs "` is not a correct integer\n" eputs 1 exit end - limit ,64 10 * - over , '0' - + limit @64 10 * + over @8 '0' - + - limit swap .64 + limit !64 1 + end drop -0 while dup limit ,64 < do +0 while dup limit @64 < do dup print 1 + end drop diff --git a/porth.porth b/porth.porth index 3bd18a5f..d65c5f5f 100644 --- a/porth.porth +++ b/porth.porth @@ -1,140 +1,533 @@ +// In progress rewrite of ./porth.py in Porth +//patch_rebase_fix2 include "std.porth" -macro PUTD_BUFFER_CAP 32 end -macro MEM_CAPACITY 640000 end -macro SIM_STACK_CAP 1024 end - -macro OP_PUSH_INT 0 end -macro OP_PLUS 1 end -macro OP_PRINT 2 end - -// struct Op { -// type: u64, -// operand: u64, -// } -macro Op.type nop end -macro Op.operand 8 + end -macro sizeof(Op) 16 end - -// Memory Layout -macro putd-buffer mem end -macro sim-stack-count putd-buffer PUTD_BUFFER_CAP + end -macro sim-stack sim-stack-count 8 + end -macro ops-count sim-stack SIM_STACK_CAP 8 * + end -macro ops ops-count 8 + end - -macro sim-stack-push // u64 -- - if sim-stack-count @64 SIM_STACK_CAP >= do +const MEM_CAPACITY 640000 end +const SIM_STACK_CAP 1024 end + +const OP_PUSH_INT 1 offset end +const OP_IF 1 offset end +const OP_END 1 offset end +const OP_INTRINSIC 1 offset end +const COUNT_OPS reset end + +const INTRINSIC_PLUS 1 offset end +const INTRINSIC_MINUS 1 offset end +const INTRINSIC_MUL 1 offset end +const INTRINSIC_DIVMOD 1 offset end +const INTRINSIC_EQ 1 offset end +const INTRINSIC_GT 1 offset end +const INTRINSIC_LT 1 offset end +const INTRINSIC_GE 1 offset end +const INTRINSIC_LE 1 offset end +const INTRINSIC_NE 1 offset end +const INTRINSIC_SHR 1 offset end +const INTRINSIC_SHL 1 offset end +const INTRINSIC_OR 1 offset end +const INTRINSIC_AND 1 offset end +const INTRINSIC_NOT 1 offset end +const INTRINSIC_PRINT 1 offset end +const INTRINSIC_DUP 1 offset end +const INTRINSIC_SWAP 1 offset end +const INTRINSIC_DROP 1 offset end +const INTRINSIC_OVER 1 offset end +const INTRINSIC_ROT 1 offset end +const INTRINSIC_LOAD8 1 offset end +const INTRINSIC_STORE8 1 offset end +const INTRINSIC_LOAD16 1 offset end +const INTRINSIC_STORE16 1 offset end +const INTRINSIC_LOAD32 1 offset end +const INTRINSIC_STORE32 1 offset end +const INTRINSIC_LOAD64 1 offset end +const INTRINSIC_STORE64 1 offset end +const INTRINSIC_CAST_PTR 1 offset end +const INTRINSIC_CAST_INT 1 offset end +const INTRINSIC_CAST_BOOL 1 offset end +const INTRINSIC_ARGC 1 offset end +const INTRINSIC_ARGV 1 offset end +const INTRINSIC_HERE 1 offset end +const INTRINSIC_SYSCALL0 1 offset end +const INTRINSIC_SYSCALL1 1 offset end +const INTRINSIC_SYSCALL2 1 offset end +const INTRINSIC_SYSCALL3 1 offset end +const INTRINSIC_SYSCALL4 1 offset end +const INTRINSIC_SYSCALL5 1 offset end +const INTRINSIC_SYSCALL6 1 offset end +const COUNT_INTRINSICS reset end + +const offsetof(Op.type) sizeof(u64) offset end +const offsetof(Op.operand) sizeof(u64) offset end +const sizeof(Op) reset end +proc Op.type offsetof(Op.type) + end +proc @Op.type Op.type @64 end +proc !Op.type Op.type !64 end +proc Op.operand offsetof(Op.operand) + end +proc @Op.operand Op.operand @64 end +proc !Op.operand Op.operand !64 end + +// TODO: implement reusable stack data structure + +memory sim-stack-count sizeof(u64) end +memory sim-stack sizeof(u64) SIM_STACK_CAP * end +const OPS_CAP 1024 end +memory ops-count sizeof(u64) end +proc @ops-count ops-count @64 end +memory ops sizeof(Op) OPS_CAP * end + +proc cmd-echoed // argv + memory wstatus sizeof(u64) end + memory empty_envp sizeof(ptr) end + 0 empty_envp !64 + + "[CMD]" puts + dup while dup @64 0 != do + " " puts + // TODO: properly escape the logged CMD + dup @64 cast(ptr) cstr-to-str puts + 8 + + end drop + "\n" puts + + fork + + dup 0 = if + drop + dup @64 cast(ptr) empty_envp + rot rot + execve + dup 0 < if + "[ERROR] could not exec external program\n" eputs + 1 exit + end + else dup 0 > if* + drop + // TODO: handle the result of wait4 + NULL 0 wstatus -1 wait4 drop + else + drop + "[ERROR] could not fork a child\n" eputs + 1 exit + end + + drop +end + +proc sim-stack-push // u64 -- + cast(int) + sim-stack-count @64 SIM_STACK_CAP >= if here eputs ": ERROR: data stack overflow in simulation mode\n" eputs 1 exit end sim-stack sim-stack-count @64 8 * + !64 sim-stack-count inc64 end -macro sim-stack-pop // -- u64 - if sim-stack-count @64 0 = do +proc sim-stack-pop // -- u64 + sim-stack-count @64 0 = if here eputs ": ERROR: data stack underflow in simulation mode\n" eputs 1 exit end sim-stack-count dec64 sim-stack sim-stack-count @64 8 * + @64 end -macro putd // u64 -- - if dup 0 = do - "0" puts - else - putd-buffer PUTD_BUFFER_CAP + - while over 0 > do - 1 - dup rot - 10 divmod - rot swap '0' + . swap - end - - dup - putd-buffer PUTD_BUFFER_CAP + swap - swap puts +proc push-op // type operand -- + @ops-count OPS_CAP >= if + here eputs ": ERROR: ops overflow\n" eputs 1 exit end - drop -end -macro push-op // type operand -- - ops-count @64 sizeof(Op) * ops + - dup Op.operand rot swap !64 - Op.type !64 + @ops-count sizeof(Op) * ops + + dup rot swap !Op.operand + !Op.type ops-count inc64 end -macro dump-ops // -- - 0 while dup ops-count @64 < do +proc print-op-type + COUNT_OPS 4 != if + here eputs ": Assertion Failed: Exhaustive handling of Op types in print-op-type\n" eputs + 1 exit + end + + dup OP_PUSH_INT = if + "OP_PUSH_INT" puts + else dup OP_INTRINSIC = if* + "OP_INTRINSIC" puts + else dup OP_IF = if* + "OP_IF" puts + else dup OP_END = if* + "OP_END" puts + else + here eputs ": Unknown op type\n" eputs 1 exit + end + drop +end + +proc dump-ops // -- + 0 while dup @ops-count < do // ptr ptr dup sizeof(Op) * ops + - "Type: " puts dup Op.type @64 print - "Operand: " puts Op.operand @64 print + "IP: " puts over putu "\n" puts + "Type: " puts dup @Op.type print-op-type "\n" puts + "Operand: " puts @Op.operand putu "\n" puts "----------\n" puts 1 + end drop end -// TODO: porth.porth does not run nasm and ld as external commands to finish off the process of compilation -macro compile-ops // -- - "BITS 64\n" puts - "segment .text\n" puts - "print:\n" puts - " mov r9, -3689348814741910323\n" puts - " sub rsp, 40\n" puts - " mov BYTE [rsp+31], 10\n" puts - " lea rcx, [rsp+30]\n" puts - ".L2:\n" puts - " mov rax, rdi\n" puts - " lea r8, [rsp+32]\n" puts - " mul r9\n" puts - " mov rax, rdi\n" puts - " sub r8, rcx\n" puts - " shr rdx, 3\n" puts - " lea rsi, [rdx+rdx*4]\n" puts - " add rsi, rsi\n" puts - " sub rax, rsi\n" puts - " add eax, 48\n" puts - " mov BYTE [rcx], al\n" puts - " mov rax, rdi\n" puts - " mov rdi, rdx\n" puts - " mov rdx, rcx\n" puts - " sub rcx, 1\n" puts - " cmp rax, 9\n" puts - " ja .L2\n" puts - " lea rax, [rsp+32]\n" puts - " mov edi, 1\n" puts - " sub rdx, rax\n" puts - " xor eax, eax\n" puts - " lea rsi, [rsp+32+rdx]\n" puts - " mov rdx, r8\n" puts - " mov rax, 1\n" puts - " syscall\n" puts - " add rsp, 40\n" puts - " ret\n" puts - "global _start\n" puts - "_start:\n" puts - " mov [args_ptr], rsp\n" puts - - 0 while dup ops-count @64 < do +proc compile-ops // -- + "[INFO] Generating output.asm\n" puts + + memory out-fd sizeof(u64) end + + 420 // mode + O_CREAT O_WRONLY or // flags + // TODO: the output file path should be based on the input file path + "output.asm"c // pathname + AT_FDCWD + openat + out-fd !64 + + out-fd @64 0 < if + "[ERROR] could not open `output.asm`\n" eputs + 1 exit + end + + "BITS 64\n" out-fd @64 fputs + "segment .text\n" out-fd @64 fputs + "print:\n" out-fd @64 fputs + " mov r9, -3689348814741910323\n" out-fd @64 fputs + " sub rsp, 40\n" out-fd @64 fputs + " mov BYTE [rsp+31], 10\n" out-fd @64 fputs + " lea rcx, [rsp+30]\n" out-fd @64 fputs + ".L2:\n" out-fd @64 fputs + " mov rax, rdi\n" out-fd @64 fputs + " lea r8, [rsp+32]\n" out-fd @64 fputs + " mul r9\n" out-fd @64 fputs + " mov rax, rdi\n" out-fd @64 fputs + " sub r8, rcx\n" out-fd @64 fputs + " shr rdx, 3\n" out-fd @64 fputs + " lea rsi, [rdx+rdx*4]\n" out-fd @64 fputs + " add rsi, rsi\n" out-fd @64 fputs + " sub rax, rsi\n" out-fd @64 fputs + " add eax, 48\n" out-fd @64 fputs + " mov BYTE [rcx], al\n" out-fd @64 fputs + " mov rax, rdi\n" out-fd @64 fputs + " mov rdi, rdx\n" out-fd @64 fputs + " mov rdx, rcx\n" out-fd @64 fputs + " sub rcx, 1\n" out-fd @64 fputs + " cmp rax, 9\n" out-fd @64 fputs + " ja .L2\n" out-fd @64 fputs + " lea rax, [rsp+32]\n" out-fd @64 fputs + " mov edi, 1\n" out-fd @64 fputs + " sub rdx, rax\n" out-fd @64 fputs + " xor eax, eax\n" out-fd @64 fputs + " lea rsi, [rsp+32+rdx]\n" out-fd @64 fputs + " mov rdx, r8\n" out-fd @64 fputs + " mov rax, 1\n" out-fd @64 fputs + " syscall\n" out-fd @64 fputs + " add rsp, 40\n" out-fd @64 fputs + " ret\n" out-fd @64 fputs + "global _start\n" out-fd @64 fputs + "_start:\n" out-fd @64 fputs + " mov [args_ptr], rsp\n" out-fd @64 fputs + + 0 while dup @ops-count < do dup sizeof(Op) * ops + - if dup Op.type @64 OP_PUSH_INT = do - " ;; -- push int " puts dup Op.operand @64 putd " --\n" puts - " mov rax, " puts dup Op.operand @64 putd "\n" puts - " push rax\n" puts - elif dup Op.type @64 OP_PLUS = do - " ;; -- plus --\n" puts - " pop rax\n" puts - " pop rbx\n" puts - " add rax, rbx\n" puts - " push rax\n" puts - elif dup Op.type @64 OP_PRINT = do - " ;; -- print --\n" puts - " pop rdi\n" puts - " call print\n" puts + // TODO: compile time assertion + COUNT_OPS 4 != if + here eputs ": Assertion Failed: Exhaustive handling of Op types in compile-ops\n" eputs + 1 exit + end + + "addr_" out-fd @64 fputs + over out-fd @64 fputu + ":\n" out-fd @64 fputs + + dup @Op.type OP_PUSH_INT = if + " ;; -- push int " out-fd @64 fputs dup @Op.operand out-fd @64 fputu " --\n" out-fd @64 fputs + " mov rax, " out-fd @64 fputs dup @Op.operand out-fd @64 fputu "\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.type OP_IF = if* + " ;; -- if --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " test rax, rax\n" out-fd @64 fputs + " jz addr_" out-fd @64 fputs dup @Op.operand out-fd @64 fputu "\n" out-fd @64 fputs + else dup @Op.type OP_END = if* + " ;; -- end --\n" out-fd @64 fputs + " jmp addr_" out-fd @64 fputs dup @Op.operand out-fd @64 fputu "\n" out-fd @64 fputs + else dup @Op.type OP_INTRINSIC = if* + COUNT_INTRINSICS 42 != if + here eputs ": Assertion Failed: Exhaustive handling of Intrinsics in compile-ops\n" eputs + 1 exit + end + + dup @Op.operand INTRINSIC_PLUS = if + " ;; -- plus --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " add rax, rbx\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_MINUS = if* + " ;; -- minus --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " sub rbx, rax\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_MUL = if* + " ;; -- mul --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " mul rbx\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_DIVMOD = if* + " ;; -- mod --\n" out-fd @64 fputs + " xor rdx, rdx\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " div rbx\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + " push rdx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SHR = if* + " ;; -- shr --\n" out-fd @64 fputs + " pop rcx\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " shr rbx, cl\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SHL = if* + " ;; -- shl --\n" out-fd @64 fputs + " pop rcx\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " shl rbx, cl\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_OR = if* + " ;; -- bor --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " or rbx, rax\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_AND = if* + " ;; -- band --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " and rbx, rax\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_NOT = if* + " ;; -- not --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " not rax\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_PRINT = if* + " ;; -- print --\n" out-fd @64 fputs + " pop rdi\n" out-fd @64 fputs + " call print\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_EQ = if* + " ;; -- equal --\n" out-fd @64 fputs + " mov rcx, 0\n" out-fd @64 fputs + " mov rdx, 1\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " cmp rax, rbx\n" out-fd @64 fputs + " cmove rcx, rdx\n" out-fd @64 fputs + " push rcx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_GT = if* + " ;; -- gt --\n" out-fd @64 fputs + " mov rcx, 0\n" out-fd @64 fputs + " mov rdx, 1\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " cmp rax, rbx\n" out-fd @64 fputs + " cmovg rcx, rdx\n" out-fd @64 fputs + " push rcx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_LT = if* + " ;; -- gt --\n" out-fd @64 fputs + " mov rcx, 0\n" out-fd @64 fputs + " mov rdx, 1\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " cmp rax, rbx\n" out-fd @64 fputs + " cmovl rcx, rdx\n" out-fd @64 fputs + " push rcx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_GE = if* + " ;; -- gt --\n" out-fd @64 fputs + " mov rcx, 0\n" out-fd @64 fputs + " mov rdx, 1\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " cmp rax, rbx\n" out-fd @64 fputs + " cmovge rcx, rdx\n" out-fd @64 fputs + " push rcx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_LE = if* + " ;; -- gt --\n" out-fd @64 fputs + " mov rcx, 0\n" out-fd @64 fputs + " mov rdx, 1\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " cmp rax, rbx\n" out-fd @64 fputs + " cmovle rcx, rdx\n" out-fd @64 fputs + " push rcx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_NE = if* + " ;; -- ne --\n" out-fd @64 fputs + " mov rcx, 0\n" out-fd @64 fputs + " mov rdx, 1\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " cmp rax, rbx\n" out-fd @64 fputs + " cmovne rcx, rdx\n" out-fd @64 fputs + " push rcx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_DUP = if* + " ;; -- dup --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SWAP = if* + " ;; -- swap --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_DROP = if* + " ;; -- drop --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_OVER = if* + " ;; -- over --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_ROT = if* + " ;; -- rot --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " pop rcx\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + " push rcx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_LOAD8 = if* + " ;; -- @8 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " xor rbx, rbx\n" out-fd @64 fputs + " mov bl, [rax]\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_STORE8 = if* + " ;; -- !8 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " mov [rax], bl\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_LOAD16 = if* + " ;; -- @16 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " xor rbx, rbx\n" out-fd @64 fputs + " mov bx, [rax]\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_STORE16 = if* + " ;; -- !16 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " mov [rax], bx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_LOAD32 = if* + " ;; -- @32 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " xor rbx, rbx\n" out-fd @64 fputs + " mov ebx, [rax]\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_STORE32 = if* + " ;; -- !32 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " mov [rax], ebx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_LOAD64 = if* + " ;; -- @64 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " xor rbx, rbx\n" out-fd @64 fputs + " mov rbx, [rax]\n" out-fd @64 fputs + " push rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_STORE64 = if* + " ;; -- !64 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rbx\n" out-fd @64 fputs + " mov [rax], rbx\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_ARGC = if* + " ;; -- argc --\n" out-fd @64 fputs + " mov rax, [args_ptr]\n" out-fd @64 fputs + " mov rax, [rax]\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_ARGV = if* + " ;; -- argv --\n" out-fd @64 fputs + " mov rax, [args_ptr]\n" out-fd @64 fputs + " add rax, 8\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_HERE = if* + here eputs ": TODO: intrinsic `here` is not implemented yet" eputs + else dup @Op.operand INTRINSIC_CAST_PTR = if* + " ;; -- cast(ptr) --\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_CAST_INT = if* + " ;; -- cast(int) --\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_CAST_BOOL = if* + " ;; -- cast(bool) --\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SYSCALL0 = if* + " ;; -- syscall0 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " syscall\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SYSCALL1 = if* + " ;; -- syscall1 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rdi\n" out-fd @64 fputs + " syscall\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SYSCALL2 = if* + " ;; -- syscall2 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rdi\n" out-fd @64 fputs + " pop rsi\n" out-fd @64 fputs + " syscall\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SYSCALL3 = if* + " ;; -- syscall3 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rdi\n" out-fd @64 fputs + " pop rsi\n" out-fd @64 fputs + " pop rdx\n" out-fd @64 fputs + " syscall\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SYSCALL4 = if* + " ;; -- syscall4 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rdi\n" out-fd @64 fputs + " pop rsi\n" out-fd @64 fputs + " pop rdx\n" out-fd @64 fputs + " pop r10\n" out-fd @64 fputs + " syscall\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SYSCALL5 = if* + " ;; -- syscall5 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rdi\n" out-fd @64 fputs + " pop rsi\n" out-fd @64 fputs + " pop rdx\n" out-fd @64 fputs + " pop r10\n" out-fd @64 fputs + " pop r8\n" out-fd @64 fputs + " syscall\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else dup @Op.operand INTRINSIC_SYSCALL6 = if* + " ;; -- syscall6 --\n" out-fd @64 fputs + " pop rax\n" out-fd @64 fputs + " pop rdi\n" out-fd @64 fputs + " pop rsi\n" out-fd @64 fputs + " pop rdx\n" out-fd @64 fputs + " pop r10\n" out-fd @64 fputs + " pop r8\n" out-fd @64 fputs + " pop r9\n" out-fd @64 fputs + " syscall\n" out-fd @64 fputs + " push rax\n" out-fd @64 fputs + else + here eputs ": unreachable.\n" eputs + 1 exit + end else - here eputs ": unreachable\n" eputs 1 exit + here eputs ": unreachable.\n" eputs + 1 exit end drop @@ -143,84 +536,548 @@ macro compile-ops // -- end drop - " mov rax, 60\n" puts - " mov rdi, 0\n" puts - " syscall\n" puts - "segment .bss\n" puts - "args_ptr: resq 1\n" puts - "mem: resb " puts MEM_CAPACITY putd "\n" puts -end + " mov rax, 60\n" out-fd @64 fputs + " mov rdi, 0\n" out-fd @64 fputs + " syscall\n" out-fd @64 fputs + "segment .bss\n" out-fd @64 fputs + "args_ptr: resq 1\n" out-fd @64 fputs + "mem: resb " out-fd @64 fputs MEM_CAPACITY out-fd @64 fputu "\n" out-fd @64 fputs -macro simulate-ops // -- - 0 while dup ops-count @64 < do - dup sizeof(Op) * ops + + out-fd @64 close drop + + memory nasm-argv sizeof(ptr) 4 * end + // TODO: search for external utilities in $PATH + "/usr/bin/nasm"c nasm-argv 0 8 * + !64 + "-felf64"c nasm-argv 1 8 * + !64 + "output.asm"c nasm-argv 2 8 * + !64 + NULL nasm-argv 3 8 * + !64 + nasm-argv cmd-echoed + + memory ld-argv sizeof(ptr) 5 * end + "/usr/bin/ld"c ld-argv 0 8 * + !64 + "-o"c ld-argv 1 8 * + !64 + "output"c ld-argv 2 8 * + !64 + "output.o"c ld-argv 3 8 * + !64 + NULL ld-argv 4 8 * + !64 + ld-argv cmd-echoed + + memory output-argv sizeof(ptr) 2 * end + "./output"c output-argv 0 8 * + !64 + NULL output-argv 1 8 * + !64 + output-argv cmd-echoed +end + + +proc simulate-ops // -- + memory sim-ip sizeof(u64) end + memory sim-op sizeof(Op) end + + 0 sim-ip !64 + while sim-ip @64 @ops-count < do + sizeof(Op) + sim-ip @64 sizeof(Op) * ops + + sim-op + memcpy + + COUNT_OPS 4 != if + here eputs ": Assertion Failed: Exhaustive handling of Op types in simulate-ops\n" eputs + 1 exit + end + + sim-op @Op.type OP_PUSH_INT = if + sim-op @Op.operand sim-stack-push + sim-ip inc64 + else sim-op @Op.type OP_IF = if* + sim-stack-pop cast(bool) if + sim-ip inc64 + else + sim-op @Op.operand sim-ip !64 + end + else sim-op @Op.type OP_END = if* + sim-op @Op.operand sim-ip !64 + else sim-op @Op.type OP_INTRINSIC = if* + COUNT_INTRINSICS 42 != if + here eputs ": Assertion Failed: Exhaustive handling of Intrinsics in compile-ops\n" eputs + 1 exit + end - if dup Op.type @64 OP_PUSH_INT = do - dup Op.operand @64 sim-stack-push - elif dup Op.type @64 OP_PLUS = do - sim-stack-pop - sim-stack-pop - + - sim-stack-push - elif dup Op.type @64 OP_PRINT = do - sim-stack-pop print + sim-op @Op.operand INTRINSIC_PLUS = if + sim-stack-pop + sim-stack-pop + + + sim-stack-push + else sim-op @Op.operand INTRINSIC_MINUS = if* + sim-stack-pop + sim-stack-pop + swap + - + sim-stack-push + else sim-op @Op.operand INTRINSIC_MUL = if* + sim-stack-pop + sim-stack-pop + * + sim-stack-push + else sim-op @Op.operand INTRINSIC_DIVMOD = if* + sim-stack-pop + sim-stack-pop + swap + divmod + swap + sim-stack-push + sim-stack-push + else sim-op @Op.operand INTRINSIC_SHR = if* + sim-stack-pop + sim-stack-pop + swap + shr + sim-stack-push + else sim-op @Op.operand INTRINSIC_SHL = if* + sim-stack-pop + sim-stack-pop + swap + shl + sim-stack-push + else sim-op @Op.operand INTRINSIC_OR = if* + sim-stack-pop + sim-stack-pop + or + sim-stack-push + else sim-op @Op.operand INTRINSIC_AND = if* + sim-stack-pop + sim-stack-pop + and + sim-stack-push + else sim-op @Op.operand INTRINSIC_NOT = if* + sim-stack-pop + not + sim-stack-push + else sim-op @Op.operand INTRINSIC_PRINT = if* + sim-stack-pop + print + else sim-op @Op.operand INTRINSIC_EQ = if* + sim-stack-pop + sim-stack-pop + = + sim-stack-push + else sim-op @Op.operand INTRINSIC_GT = if* + sim-stack-pop + sim-stack-pop + swap + > + sim-stack-push + else sim-op @Op.operand INTRINSIC_LT = if* + sim-stack-pop + sim-stack-pop + swap + < + sim-stack-push + else sim-op @Op.operand INTRINSIC_GE = if* + sim-stack-pop + sim-stack-pop + swap + >= + sim-stack-push + else sim-op @Op.operand INTRINSIC_LE = if* + sim-stack-pop + sim-stack-pop + swap + <= + sim-stack-push + else sim-op @Op.operand INTRINSIC_NE = if* + sim-stack-pop + sim-stack-pop + != + sim-stack-push + else sim-op @Op.operand INTRINSIC_DUP = if* + sim-stack-pop + dup + sim-stack-push + sim-stack-push + else sim-op @Op.operand INTRINSIC_SWAP = if* + sim-stack-pop + sim-stack-pop + swap + sim-stack-push + sim-stack-push + else sim-op @Op.operand INTRINSIC_DROP = if* + sim-stack-pop + drop + else sim-op @Op.operand INTRINSIC_OVER = if* + sim-stack-pop + sim-stack-pop + dup + sim-stack-push + swap + sim-stack-push + sim-stack-push + else sim-op @Op.operand INTRINSIC_ROT = if* + sim-stack-pop + sim-stack-pop + sim-stack-pop + swap + sim-stack-push + swap + sim-stack-push + sim-stack-push + else sim-op @Op.operand INTRINSIC_LOAD8 = if* + here eputs ": TODO: `@8` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_STORE8 = if* + here eputs ": TODO: `!8` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_LOAD16 = if* + here eputs ": TODO: `@16` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_STORE16 = if* + here eputs ": TODO: `!16` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_LOAD32 = if* + here eputs ": TODO: `@32` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_STORE32 = if* + here eputs ": TODO: `!32` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_LOAD64 = if* + here eputs ": TODO: `@64` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_STORE64 = if* + here eputs ": TODO: `!64` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_ARGC = if* + here eputs ": TODO: `argc` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_ARGV = if* + here eputs ": TODO: `argv` is not implemented yet" eputs 1 exit + else sim-op @Op.operand INTRINSIC_HERE = if* + here eputs ": TODO: `here` is not implemented yet" eputs + else sim-op @Op.operand INTRINSIC_CAST_PTR = if* + else sim-op @Op.operand INTRINSIC_CAST_INT = if* + else sim-op @Op.operand INTRINSIC_CAST_BOOL = if* + else sim-op @Op.operand INTRINSIC_SYSCALL0 = if* + here eputs ": TODO: `syscall0` is not implemented yet" eputs + else sim-op @Op.operand INTRINSIC_SYSCALL1 = if* + here eputs ": TODO: `syscall1` is not implemented yet" eputs + else sim-op @Op.operand INTRINSIC_SYSCALL2 = if* + here eputs ": TODO: `syscall2` is not implemented yet" eputs + else sim-op @Op.operand INTRINSIC_SYSCALL3 = if* + here eputs ": TODO: `syscall3` is not implemented yet" eputs + else sim-op @Op.operand INTRINSIC_SYSCALL4 = if* + here eputs ": TODO: `syscall4` is not implemented yet" eputs + else sim-op @Op.operand INTRINSIC_SYSCALL5 = if* + here eputs ": TODO: `syscall5` is not implemented yet" eputs + else sim-op @Op.operand INTRINSIC_SYSCALL6 = if* + here eputs ": TODO: `syscall6` is not implemented yet" eputs + else + here eputs ": unreachable.\n" eputs + 1 exit + end + + sim-ip inc64 else here eputs ": unreachable\n" eputs 1 exit end + end +end - drop +const PARSE_BLOCK_STACK_CAP 1024 end +memory parse-block-stack-count sizeof(u64) end +proc @parse-block-stack-count parse-block-stack-count @64 end +memory parse-block-stack sizeof(u64) PARSE_BLOCK_STACK_CAP * end - 1 + +proc parse-block-stack-push + @parse-block-stack-count PARSE_BLOCK_STACK_CAP >= if + here eputs ": ERROR: parse block stack overflow\n" eputs 1 exit end - drop -end + parse-block-stack @parse-block-stack-count sizeof(u64) * + !64 + parse-block-stack-count inc64 +end -macro program69 // -- - OP_PUSH_INT 34 push-op - OP_PUSH_INT 35 push-op - OP_PLUS 0 push-op - OP_PRINT 0 push-op +proc parse-block-stack-pop + @parse-block-stack-count 0 = if + here eputs ": ERROR: parse block stack underflow\n" eputs 1 exit + end + parse-block-stack-count dec64 + parse-block-stack @parse-block-stack-count sizeof(u64) * + @64 end -macro program123 // -- - OP_PUSH_INT 1 push-op - OP_PRINT 0 push-op - OP_PUSH_INT 2 push-op - OP_PRINT 0 push-op - OP_PUSH_INT 3 push-op - OP_PRINT 0 push-op +proc str-starts-with // prefix-count prefix-data input-count input-data + memory ssw-prefix sizeof(Str) end + memory ssw-input sizeof(Str) end + ssw-input !Str + ssw-prefix !Str + + ssw-prefix @Str.count + ssw-input @Str.count + <= if + 0 while + dup ssw-prefix @Str.count < if + dup ssw-input @Str.data + @8 + over ssw-prefix @Str.data + @8 + = + else false end + do 1 + end + ssw-prefix @Str.count >= + else false end +end + +proc remove-comment // output input + memory comment sizeof(Str) end "//" comment !Str + + over 0 swap !Str.count + 2dup @Str.data swap !Str.data + while + dup @Str.count 0 > if + dup comment @Str rot @Str str-starts-with lnot + else false end + do + dup str-chop-one-left + over Str.count inc64 + end + 2drop +end + +proc parse_file_path_cstr_into_ops // file-path-cstr + memory file-path-cstr sizeof(ptr) end + file-path-cstr !64 + + 0 // mode + O_RDONLY // flags + file-path-cstr @64 cast(ptr) // pathname + AT_FDCWD // dirfd + openat + + dup 0 < if + "ERROR: could not open file " eputs file-path-cstr @64 cast(ptr) cstr-to-str eputs "\n" eputs + 1 exit + end + + memory fd sizeof(u64) end + fd !64 + + memory statbuf sizeof(stat) end + statbuf fd @64 fstat 0 < if + "ERROR: could not determine the size of file " eputs file-path-cstr @64 cast(ptr) cstr-to-str eputs "\n" eputs + 1 exit + end + + memory content sizeof(Str) end + statbuf @stat.st_size content !Str.count + + 0 // offset + fd @64 // fd + MAP_PRIVATE // flags + PROT_READ // prot + content @Str.count // length + NULL // addr + mmap + content !Str.data + + content @Str.data cast(int) 0 < if + "ERROR: could not memory map file " eputs file-path-cstr @64 cast(ptr) cstr-to-str eputs "\n" eputs + 1 exit + end + + memory line_number sizeof(u64) end + memory line-with-comment sizeof(Str) end + memory line sizeof(Str) end + memory word sizeof(Str) end + memory line_start sizeof(ptr) end + + 1 line_number !64 + while content @Str.count 0 > do + '\n' line-with-comment content str-chop-by-delim + line line-with-comment remove-comment + line @Str.data line_start !64 + while line @Str.count 0 > do + line str-trim-left + ' ' word line str-chop-by-delim + + COUNT_OPS 4 != if + here eputs ": Assertion Failed: Exhaustive handling of Op types in parse-file-path\n" eputs + 1 exit + end + + COUNT_INTRINSICS 42 != if + here eputs ": Assertion Failed: Exhaustive handling of Intrinsics in parse-file-path\n" eputs + 1 exit + end + + // Intrinsics + word @Str "+" streq if + OP_INTRINSIC INTRINSIC_PLUS push-op + else word @Str "-" streq if* + OP_INTRINSIC INTRINSIC_MINUS push-op + else word @Str "*" streq if* + OP_INTRINSIC INTRINSIC_MUL push-op + else word @Str "divmod" streq if* + OP_INTRINSIC INTRINSIC_DIVMOD push-op + else word @Str "print" streq if* + OP_INTRINSIC INTRINSIC_PRINT push-op + else word @Str "=" streq if* + OP_INTRINSIC INTRINSIC_EQ push-op + else word @Str ">" streq if* + OP_INTRINSIC INTRINSIC_GT push-op + else word @Str "<" streq if* + OP_INTRINSIC INTRINSIC_LT push-op + else word @Str ">=" streq if* + OP_INTRINSIC INTRINSIC_GE push-op + else word @Str "<=" streq if* + OP_INTRINSIC INTRINSIC_LE push-op + else word @Str "!=" streq if* + OP_INTRINSIC INTRINSIC_NE push-op + else word @Str "shr" streq if* + OP_INTRINSIC INTRINSIC_SHR push-op + else word @Str "shl" streq if* + OP_INTRINSIC INTRINSIC_SHL push-op + else word @Str "or" streq if* + OP_INTRINSIC INTRINSIC_OR push-op + else word @Str "and" streq if* + OP_INTRINSIC INTRINSIC_AND push-op + else word @Str "not" streq if* + OP_INTRINSIC INTRINSIC_NOT push-op + else word @Str "dup" streq if* + OP_INTRINSIC INTRINSIC_DUP push-op + else word @Str "swap" streq if* + OP_INTRINSIC INTRINSIC_SWAP push-op + else word @Str "drop" streq if* + OP_INTRINSIC INTRINSIC_DROP push-op + else word @Str "over" streq if* + OP_INTRINSIC INTRINSIC_OVER push-op + else word @Str "rot" streq if* + OP_INTRINSIC INTRINSIC_ROT push-op + else word @Str "!8" streq if* + OP_INTRINSIC INTRINSIC_STORE8 push-op + else word @Str "@8" streq if* + OP_INTRINSIC INTRINSIC_LOAD8 push-op + else word @Str "!16" streq if* + OP_INTRINSIC INTRINSIC_STORE16 push-op + else word @Str "@16" streq if* + OP_INTRINSIC INTRINSIC_LOAD16 push-op + else word @Str "!32" streq if* + OP_INTRINSIC INTRINSIC_STORE32 push-op + else word @Str "@32" streq if* + OP_INTRINSIC INTRINSIC_LOAD32 push-op + else word @Str "!64" streq if* + OP_INTRINSIC INTRINSIC_STORE64 push-op + else word @Str "@64" streq if* + OP_INTRINSIC INTRINSIC_LOAD64 push-op + else word @Str "cast(ptr)" streq if* + OP_INTRINSIC INTRINSIC_CAST_PTR push-op + else word @Str "cast(int)" streq if* + OP_INTRINSIC INTRINSIC_CAST_INT push-op + else word @Str "cast(bool)" streq if* + OP_INTRINSIC INTRINSIC_CAST_BOOL push-op + else word @Str "argc" streq if* + OP_INTRINSIC INTRINSIC_ARGC push-op + else word @Str "argv" streq if* + OP_INTRINSIC INTRINSIC_ARGV push-op + else word @Str "here" streq if* + OP_INTRINSIC INTRINSIC_HERE push-op + else word @Str "syscall0" streq if* + OP_INTRINSIC INTRINSIC_SYSCALL0 push-op + else word @Str "syscall1" streq if* + OP_INTRINSIC INTRINSIC_SYSCALL1 push-op + else word @Str "syscall2" streq if* + OP_INTRINSIC INTRINSIC_SYSCALL2 push-op + else word @Str "syscall3" streq if* + OP_INTRINSIC INTRINSIC_SYSCALL3 push-op + else word @Str "syscall4" streq if* + OP_INTRINSIC INTRINSIC_SYSCALL4 push-op + else word @Str "syscall5" streq if* + OP_INTRINSIC INTRINSIC_SYSCALL5 push-op + else word @Str "syscall6" streq if* + OP_INTRINSIC INTRINSIC_SYSCALL6 push-op + + // Keywords + else word @Str "if" streq if* + @ops-count parse-block-stack-push + OP_IF 0 push-op + else word @Str "end" streq if* + @parse-block-stack-count 0 <= if + file-path-cstr @64 cast(ptr) cstr-to-str eputs + ":" puts line_number @64 putu + ":" puts word @Str.data cast(int) line_start @64 - 1 + putu + ": ERROR: `end` can only close `if` for now\n" eputs + 1 exit + end + + parse-block-stack-pop + + ops over sizeof(Op) * + + + dup @Op.type OP_IF != if + file-path-cstr @64 cast(ptr) cstr-to-str eputs + ":" puts line_number @64 putu + ":" puts word @Str.data cast(int) line_start @64 - 1 + putu + ": ERROR: `end` can only close `if` for now\n" eputs + 1 exit + end + + @ops-count swap !Op.operand + + drop // ip + + OP_END @ops-count 1 + push-op + else + OP_PUSH_INT + word @Str try-parse-int lnot if + file-path-cstr @64 cast(ptr) cstr-to-str eputs + ":" puts line_number @64 putu + ":" puts word @Str.data cast(int) line_start @64 - 1 + putu + ": ERROR: `" eputs word @Str eputs "` is unknown word\n" eputs + 1 exit + end + push-op + end + + end + line_number inc64 + end + // TODO: parse_file_path does not clean up resources after itself end -macro usage // -- +proc usage // -- dup "Usage: porth \n" rot fputs dup " SUBCOMMANDS:\n" rot fputs - dup " sim Simulate the program.\n" rot fputs - dup " com Compile the program\n" rot fputs - dup " dump Dump the ops of the program\n" rot fputs + dup " sim Simulate the program.\n" rot fputs + // TODO: -r flag for com subcommand + dup " com Compile the program\n" rot fputs + dup " dump Dump the ops of the program\n" rot fputs dup " help Print this help to stdout and exit with 0 code\n" rot fputs drop end -macro main // -- - if argc 2 < do +proc main // -- + argc 2 < if stderr usage "ERROR: subcommand is not provided\n" eputs 1 exit end - program123 + 1 nth_argv + dup "sim"c cstreq if + argc 3 < if + stderr usage + "ERROR: no input file is provided for the `sim` subcommand\n" eputs + 1 exit + end - // TODO: parsing file is not implemented + 2 nth_argv parse_file_path_cstr_into_ops - 1 nth_argv - if dup "sim"c cstreq do simulate-ops - elif dup "com"c cstreq do + else dup "com"c cstreq if* + argc 3 < if + stderr usage + "ERROR: no input file is provided for the `com` subcommand\n" eputs + 1 exit + end + + 2 nth_argv parse_file_path_cstr_into_ops + compile-ops - elif dup "help"c cstreq do + else dup "help"c cstreq if* stdout usage 0 exit - elif dup "dump"c cstreq do + else dup "dump"c cstreq if* + argc 3 < if + stderr usage + "ERROR: no input file is provided for the `dump` subcommand\n" eputs + 1 exit + end + + 2 nth_argv parse_file_path_cstr_into_ops + dump-ops else stderr usage diff --git a/porth.py b/porth.py index 5195cbbd..d169936c 100755 --- a/porth.py +++ b/porth.py @@ -15,10 +15,11 @@ PORTH_EXT = '.porth' DEFAULT_EXPANSION_LIMIT=1000 EXPANSION_DIAGNOSTIC_LIMIT=10 -MEM_CAPACITY = 640_000 # should be enough for everyone +X86_64_RET_STACK_CAP=8192 SIM_NULL_POINTER_PADDING = 1 # just a little bit of a padding at the beginning of the memory to make 0 an invalid address SIM_STR_CAPACITY = 640_000 SIM_ARGV_CAPACITY = 640_000 +SIM_LOCAL_MEMORY_CAPACITY = 640_000 debug=False @@ -26,18 +27,31 @@ class Keyword(Enum): IF=auto() - ELIF=auto() + IFSTAR=auto() ELSE=auto() END=auto() WHILE=auto() DO=auto() MACRO=auto() INCLUDE=auto() + MEMORY=auto() + PROC=auto() + CONST=auto() + OFFSET=auto() + RESET=auto() +class DataType(IntEnum): + INT=auto() + BOOL=auto() + PTR=auto() + +assert len(DataType) == 3, 'Exhaustive casts for all data types' class Intrinsic(Enum): PLUS=auto() MINUS=auto() MUL=auto() + # TODO: split divmod intrinsic into div and mod back + # It was never useful DIVMOD=auto() EQ=auto() GT=auto() @@ -56,16 +70,17 @@ class Intrinsic(Enum): DROP=auto() OVER=auto() ROT=auto() - MEM=auto() - LOAD=auto() - STORE=auto() - FORTH_LOAD=auto() - FORTH_STORE=auto() + LOAD8=auto() + STORE8=auto() + LOAD16=auto() + STORE16=auto() + LOAD32=auto() + STORE32=auto() LOAD64=auto() STORE64=auto() - FORTH_LOAD64=auto() - FORTH_STORE64=auto() CAST_PTR=auto() + CAST_INT=auto() + CAST_BOOL=auto() ARGC=auto() ARGV=auto() HERE=auto() @@ -81,13 +96,19 @@ class OpType(Enum): PUSH_INT=auto() PUSH_STR=auto() PUSH_CSTR=auto() + PUSH_MEM=auto() + PUSH_LOCAL_MEM=auto() INTRINSIC=auto() IF=auto() - ELIF=auto() + IFSTAR=auto() ELSE=auto() END=auto() WHILE=auto() DO=auto() + SKIP_PROC=auto() + PREP_PROC=auto() + RET=auto() + CALL=auto() class TokenType(Enum): WORD=auto() @@ -109,6 +130,7 @@ class Token: expanded_count: int = 0 OpAddr=int +MemAddr=int @dataclass class Op: @@ -116,8 +138,10 @@ class Op: token: Token operand: Optional[Union[int, str, Intrinsic, OpAddr]] = None -Program=List[Op] - +@dataclass +class Program: + ops: List[Op] + memory_capacity: int def get_cstr_from_mem(mem: bytearray, ptr: int) -> bytes: end = ptr @@ -132,7 +156,9 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): CLOCK_MONOTONIC=1 stack: List[int] = [] - mem = bytearray(SIM_NULL_POINTER_PADDING + SIM_STR_CAPACITY + SIM_ARGV_CAPACITY + MEM_CAPACITY) + # TODO: I think ret_stack should be located in the local memory just like on x86_64 + ret_stack: List[OpAddr] = [] + mem = bytearray(SIM_NULL_POINTER_PADDING + SIM_STR_CAPACITY + SIM_ARGV_CAPACITY + SIM_LOCAL_MEMORY_CAPACITY + program.memory_capacity) str_buf_ptr = SIM_NULL_POINTER_PADDING str_ptrs: Dict[int, int] = {} @@ -141,7 +167,10 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): argv_buf_ptr = SIM_NULL_POINTER_PADDING + SIM_STR_CAPACITY argc = 0 - mem_buf_ptr = SIM_NULL_POINTER_PADDING + SIM_STR_CAPACITY + SIM_ARGV_CAPACITY + local_memory_ptr = SIM_NULL_POINTER_PADDING + SIM_STR_CAPACITY + SIM_ARGV_CAPACITY + local_memory_rsp = local_memory_ptr + SIM_LOCAL_MEMORY_CAPACITY + + mem_buf_ptr = SIM_NULL_POINTER_PADDING + SIM_STR_CAPACITY + SIM_ARGV_CAPACITY + SIM_LOCAL_MEMORY_CAPACITY fds: List[BinaryIO] = [sys.stdin.buffer, sys.stdout.buffer, sys.stderr.buffer] @@ -161,9 +190,9 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): assert argc*8 <= SIM_ARGV_CAPACITY, "Argv buffer, overflow" ip = 0 - while ip < len(program): - assert len(OpType) == 10, "Exhaustive op handling in simulate_little_endian_linux" - op = program[ip] + while ip < len(program.ops): + assert len(OpType) == 16, "Exhaustive op handling in simulate_little_endian_linux" + op = program.ops[ip] try: if op.typ == OpType.PUSH_INT: assert isinstance(op.operand, int), "This could be a bug in the parsing step" @@ -194,16 +223,26 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): assert str_size <= SIM_STR_CAPACITY, "String buffer overflow" stack.append(str_ptrs[ip]) ip += 1 - elif op.typ == OpType.IF: + elif op.typ == OpType.PUSH_MEM: + assert isinstance(op.operand, MemAddr), "This could be a bug in the parsing step" + stack.append(mem_buf_ptr + op.operand) + ip += 1 + elif op.typ == OpType.PUSH_LOCAL_MEM: + assert isinstance(op.operand, MemAddr) + stack.append(local_memory_rsp + op.operand) ip += 1 + elif op.typ in [OpType.IF, OpType.IFSTAR]: + a = stack.pop() + if a == 0: + assert isinstance(op.operand, OpAddr), "This could be a bug in the parsing step" + ip = op.operand + else: + ip += 1 elif op.typ == OpType.WHILE: ip += 1 elif op.typ == OpType.ELSE: assert isinstance(op.operand, OpAddr), "This could be a bug in the parsing step" ip = op.operand - elif op.typ == OpType.ELIF: - assert isinstance(op.operand, OpAddr), "This could be a bug in the parsing step" - ip = op.operand elif op.typ == OpType.END: assert isinstance(op.operand, OpAddr), "This could be a bug in the parsing step" ip = op.operand @@ -214,8 +253,23 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): ip = op.operand else: ip += 1 + elif op.typ == OpType.SKIP_PROC: + assert isinstance(op.operand, OpAddr), "This could be a bug in the parsing step" + ip = op.operand + elif op.typ == OpType.PREP_PROC: + assert isinstance(op.operand, int) + local_memory_rsp -= op.operand + ip += 1 + elif op.typ == OpType.RET: + assert isinstance(op.operand, int) + local_memory_rsp += op.operand + ip = ret_stack.pop() + elif op.typ == OpType.CALL: + assert isinstance(op.operand, OpAddr), "This could be a bug in the parsing step" + ret_stack.append(ip + 1) + ip = op.operand elif op.typ == OpType.INTRINSIC: - assert len(Intrinsic) == 41, "Exhaustive handling of intrinsic in simulate_little_endian_linux()" + assert len(Intrinsic) == 42, "Exhaustive handling of intrinsic in simulate_little_endian_linux()" if op.operand == Intrinsic.PLUS: a = stack.pop() b = stack.pop() @@ -325,58 +379,42 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): stack.append(a) stack.append(c) ip += 1 - elif op.operand == Intrinsic.MEM: - stack.append(mem_buf_ptr) - ip += 1 - elif op.operand == Intrinsic.LOAD: + elif op.operand == Intrinsic.LOAD8: addr = stack.pop() byte = mem[addr] stack.append(byte) ip += 1 - elif op.operand == Intrinsic.STORE: - store_value = stack.pop() + elif op.operand == Intrinsic.STORE8: store_addr = stack.pop() + store_value = stack.pop() mem[store_addr] = store_value & 0xFF ip += 1 - elif op.operand == Intrinsic.FORTH_LOAD: - addr = stack.pop() - byte = mem[addr] - stack.append(byte) + elif op.operand == Intrinsic.LOAD16: + load_addr = stack.pop() + stack.append(int.from_bytes(mem[load_addr:load_addr+2], byteorder="little")) ip += 1 - elif op.operand == Intrinsic.FORTH_STORE: - store_addr = stack.pop() + elif op.operand == Intrinsic.STORE16: + store_addr = stack.pop(); store_value = stack.pop() - mem[store_addr] = store_value & 0xFF + mem[store_addr:store_addr+2] = store_value.to_bytes(length=2, byteorder="little", signed=(store_value < 0)); ip += 1 - elif op.operand == Intrinsic.LOAD64: - addr = stack.pop() - _bytes = bytearray(8) - for offset in range(0,8): - _bytes[offset] = mem[addr + offset] - stack.append(int.from_bytes(_bytes, byteorder="little")) + elif op.operand == Intrinsic.LOAD32: + load_addr = stack.pop() + stack.append(int.from_bytes(mem[load_addr:load_addr+4], byteorder="little")) ip += 1 - elif op.operand == Intrinsic.STORE64: + elif op.operand == Intrinsic.STORE32: + store_addr = stack.pop(); store_value = stack.pop() - store_value64 = store_value.to_bytes(length=8, byteorder="little", signed=(store_value < 0)); - store_addr64 = stack.pop(); - for byte in store_value64: - mem[store_addr64] = byte; - store_addr64 += 1; + mem[store_addr:store_addr+4] = store_value.to_bytes(length=4, byteorder="little", signed=(store_value < 0)); ip += 1 - elif op.operand == Intrinsic.FORTH_LOAD64: - addr = stack.pop() - _bytes = bytearray(8) - for offset in range(0,8): - _bytes[offset] = mem[addr + offset] - stack.append(int.from_bytes(_bytes, byteorder="little")) + elif op.operand == Intrinsic.LOAD64: + load_addr = stack.pop() + stack.append(int.from_bytes(mem[load_addr:load_addr+8], byteorder="little")) ip += 1 - elif op.operand == Intrinsic.FORTH_STORE64: - store_addr64 = stack.pop(); + elif op.operand == Intrinsic.STORE64: + store_addr = stack.pop(); store_value = stack.pop() - store_value64 = store_value.to_bytes(length=8, byteorder="little", signed=(store_value < 0)); - for byte in store_value64: - mem[store_addr64] = byte; - store_addr64 += 1; + mem[store_addr:store_addr+8] = store_value.to_bytes(length=8, byteorder="little", signed=(store_value < 0)); ip += 1 elif op.operand == Intrinsic.ARGC: stack.append(argc) @@ -399,6 +437,12 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): elif op.operand == Intrinsic.CAST_PTR: # Ignore the type casting. It's only useful for type_check_program() phase ip += 1 + elif op.operand == Intrinsic.CAST_BOOL: + # Ignore the type casting. It's only useful for type_check_program() phase + ip += 1 + elif op.operand == Intrinsic.CAST_INT: + # Ignore the type casting. It's only useful for type_check_program() phase + ip += 1 elif op.operand == Intrinsic.SYSCALL0: syscall_number = stack.pop(); if syscall_number == 39: # SYS_getpid @@ -431,7 +475,6 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): # NOTE: trying to behave like a POSIX tty in canonical mode by making the data available # on each newline # https://en.wikipedia.org/wiki/POSIX_terminal_interface#Canonical_mode_processing - # TODO: maybe this behavior should be customizable data = fds[fd].readline(count) mem[buf:buf+len(data)] = data stack.append(len(data)) @@ -442,21 +485,6 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): fds[fd].write(mem[buf:buf+count]) fds[fd].flush() stack.append(count) - elif syscall_number == 257: # SYS_openat - dirfd = arg1 - pathname_ptr = arg2 - flags = arg3 - if dirfd != AT_FDCWD: - assert False, "openat: unsupported dirfd" - if flags != O_RDONLY: - assert False, "openat: unsupported flags" - pathname = get_cstr_from_mem(mem, pathname_ptr).decode('utf-8') - fd = len(fds) - try: - fds.append(open(pathname, 'rb')) - stack.append(fd) - except FileNotFoundError: - stack.append(-ENOENT) else: assert False, "unknown syscall number %d" % syscall_number ip += 1 @@ -480,6 +508,24 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): nano_seconds = int.from_bytes(mem[request_ptr+8:request_ptr+8+8], byteorder='little') sleep(float(seconds)+float(nano_seconds)*1e-09) stack.append(0) + elif syscall_number == 257: # SYS_openat + dirfd = arg1 + pathname_ptr = arg2 + flags = arg3 + mode = arg4 + if dirfd != AT_FDCWD: + assert False, f"openat: unsupported dirfd: {dirfd}" + if flags != O_RDONLY: + assert False, f"openat: unsupported flags: {flags}" + if mode != 0: + assert False, f"openat: unsupported mode: {mode}" + pathname = get_cstr_from_mem(mem, pathname_ptr).decode('utf-8') + fd = len(fds) + try: + fds.append(open(pathname, 'rb')) + stack.append(fd) + except FileNotFoundError: + stack.append(-ENOENT) else: assert False, "unknown syscall number %d" % syscall_number ip += 1 @@ -495,14 +541,6 @@ def simulate_little_endian_linux(program: Program, argv: List[str]): compiler_error_with_expansion_stack(op.token, "Python Exception during simulation") traceback.print_exception(type(e), e, e.__traceback__) exit(1) - if debug: - print("[INFO] Memory dump") - print(mem[:20]) - -class DataType(IntEnum): - INT=auto() - BOOL=auto() - PTR=auto() def compiler_diagnostic(loc: Loc, tag: str, message: str): print("%s:%d:%d: %s: %s" % (loc + (tag, message)), file=sys.stderr) @@ -530,394 +568,430 @@ def compiler_note(loc: Loc, message: str): compiler_diagnostic(loc, 'NOTE', message) def not_enough_arguments(op: Op): + assert len(OpType) == 16, f"Exhaustive handling of Op types in not_enough_arguments() (expected {len(OpType)}). Keep in mind that not all of the ops should be handled in here. Only those that consume elements from the stack." if op.typ == OpType.INTRINSIC: assert isinstance(op.operand, Intrinsic) compiler_error_with_expansion_stack(op.token, "not enough arguments for the `%s` intrinsic" % INTRINSIC_NAMES[op.operand]) - # TODO: why don't we add while-do here too? - elif op.typ == OpType.IF: - compiler_error_with_expansion_stack(op.token, "not enough arguments for the if-block") + elif op.typ == OpType.DO: + compiler_error_with_expansion_stack(op.token, "not enough arguments for the do-block") else: assert False, "unsupported type of operation" DataStack=List[Tuple[DataType, Token]] -# TODO: `if 1 10 < do 69 32 elif 2 10 < do 420 end` does not properly type check +@dataclass +class Context: + stack: DataStack + ret_stack: List[OpAddr] + ip: OpAddr + +CallPath=Tuple[OpAddr, ...] + +# TODO: better error reporting on type checking errors of intrinsics +# Reported expected and actual types with the location that introduced the actual type +# TODO: better error reporting on type checking errors of procs +# Show the call path and stuff (like for macros) def type_check_program(program: Program): - stack: DataStack = [] - block_stack: List[Tuple[DataStack, OpType]] = [] - for ip in range(len(program)): - op = program[ip] - assert len(OpType) == 10, "Exhaustive ops handling in type_check_program()" + visited_dos: Dict[CallPath, DataStack] = {} + contexts: List[Context] = [Context(stack=[], ip=0, ret_stack=[])] + while len(contexts) > 0: + ctx = contexts[-1]; + if ctx.ip >= len(program.ops): + if len(ctx.stack) != 0: + compiler_error_with_expansion_stack(ctx.stack[-1][1], "unhandled data on the data stack: %s" % list(map(lambda x: x[0], ctx.stack))) + exit(1) + contexts.pop() + continue + op = program.ops[ctx.ip] + assert len(OpType) == 16, "Exhaustive ops handling in type_check_program()" if op.typ == OpType.PUSH_INT: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) + ctx.ip += 1 elif op.typ == OpType.PUSH_STR: - stack.append((DataType.INT, op.token)) - stack.append((DataType.PTR, op.token)) + ctx.stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.PTR, op.token)) + ctx.ip += 1 elif op.typ == OpType.PUSH_CSTR: - stack.append((DataType.PTR, op.token)) + ctx.stack.append((DataType.PTR, op.token)) + ctx.ip += 1 + elif op.typ == OpType.PUSH_MEM: + ctx.stack.append((DataType.PTR, op.token)) + ctx.ip += 1 + elif op.typ == OpType.PUSH_LOCAL_MEM: + ctx.stack.append((DataType.PTR, op.token)) + ctx.ip += 1 + elif op.typ == OpType.SKIP_PROC: + assert isinstance(op.operand, OpAddr) + ctx.ip = op.operand + elif op.typ == OpType.PREP_PROC: + ctx.ip += 1 + elif op.typ == OpType.CALL: + ctx.ret_stack.append(ctx.ip + 1) + assert isinstance(op.operand, OpAddr) + ctx.ip = op.operand + elif op.typ == OpType.RET: + ctx.ip = ctx.ret_stack.pop() elif op.typ == OpType.INTRINSIC: - assert len(Intrinsic) == 41, "Exhaustive intrinsic handling in type_check_program()" + assert len(Intrinsic) == 42, "Exhaustive intrinsic handling in type_check_program()" assert isinstance(op.operand, Intrinsic), "This could be a bug in compilation step" if op.operand == Intrinsic.PLUS: assert len(DataType) == 3, "Exhaustive type handling in PLUS intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == DataType.INT and b_type == DataType.INT: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) elif a_type == DataType.INT and b_type == DataType.PTR: - stack.append((DataType.PTR, op.token)) + ctx.stack.append((DataType.PTR, op.token)) elif a_type == DataType.PTR and b_type == DataType.INT: - stack.append((DataType.PTR, op.token)) + ctx.stack.append((DataType.PTR, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument types for PLUS intrinsic. Expected INT or PTR") exit(1) elif op.operand == Intrinsic.MINUS: assert len(DataType) == 3, "Exhaustive type handling in MINUS intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and (a_type == DataType.INT or a_type == DataType.PTR): - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) elif b_type == DataType.PTR and a_type == DataType.INT: - stack.append((DataType.PTR, op.token)) + ctx.stack.append((DataType.PTR, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument types fo MINUS intrinsic: %s" % [b_type, a_type]) exit(1) elif op.operand == Intrinsic.MUL: assert len(DataType) == 3, "Exhaustive type handling in MUL intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument types fo MUL intrinsic. Expected INT.") exit(1) elif op.operand == Intrinsic.DIVMOD: assert len(DataType) == 3, "Exhaustive type handling in DIVMOD intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.INT, op.token)) - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument types fo DIVMOD intrinsic. Expected INT.") exit(1) elif op.operand == Intrinsic.EQ: assert len(DataType) == 3, "Exhaustive type handling in EQ intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() - if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.BOOL, op.token)) + if a_type == b_type: + ctx.stack.append((DataType.BOOL, op.token)) else: - compiler_error_with_expansion_stack(op.token, "invalid argument types fo EQ intrinsic. Expected INT.") + compiler_error_with_expansion_stack(op.token, "invalid argument types fo EQ intrinsic.") exit(1) elif op.operand == Intrinsic.GT: assert len(DataType) == 3, "Exhaustive type handling in GT intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.BOOL, op.token)) + ctx.stack.append((DataType.BOOL, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for GT intrinsic") exit(1) elif op.operand == Intrinsic.LT: assert len(DataType) == 3, "Exhaustive type handling in LT intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.BOOL, op.token)) + ctx.stack.append((DataType.BOOL, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for LT intrinsic") exit(1) elif op.operand == Intrinsic.GE: assert len(DataType) == 3, "Exhaustive type handling in GE intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.BOOL, op.token)) + ctx.stack.append((DataType.BOOL, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for GE intrinsic") exit(1) elif op.operand == Intrinsic.LE: assert len(DataType) == 3, "Exhaustive type handling in LE intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.BOOL, op.token)) + ctx.stack.append((DataType.BOOL, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for LE intrinsic") exit(1) elif op.operand == Intrinsic.NE: assert len(DataType) == 3, "Exhaustive type handling in NE intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.BOOL, op.token)) + ctx.stack.append((DataType.BOOL, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for NE intrinsic") exit(1) elif op.operand == Intrinsic.SHR: assert len(DataType) == 3, "Exhaustive type handling in SHR intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for SHR intrinsic") exit(1) elif op.operand == Intrinsic.SHL: assert len(DataType) == 3, "Exhaustive type handling in SHL intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for SHL intrinsic") exit(1) elif op.operand == Intrinsic.OR: assert len(DataType) == 3, "Exhaustive type handling in OR intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) elif a_type == b_type and a_type == DataType.BOOL: - stack.append((DataType.BOOL, op.token)) + ctx.stack.append((DataType.BOOL, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for OR intrinsic") exit(1) elif op.operand == Intrinsic.AND: assert len(DataType) == 3, "Exhaustive type handling in AND intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == b_type and a_type == DataType.INT: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) elif a_type == b_type and a_type == DataType.BOOL: - stack.append((DataType.BOOL, op.token)) + ctx.stack.append((DataType.BOOL, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for AND intrinsic") exit(1) elif op.operand == Intrinsic.NOT: assert len(DataType) == 3, "Exhaustive type handling in NOT intrinsic" - if len(stack) < 1: + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() if a_type == DataType.INT: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) elif a_type == DataType.BOOL: - stack.append((DataType.BOOL, op.token)) + ctx.stack.append((DataType.BOOL, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for NOT intrinsic") exit(1) elif op.operand == Intrinsic.PRINT: - if len(stack) < 1: + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - stack.pop() + ctx.stack.pop() elif op.operand == Intrinsic.DUP: - if len(stack) < 1: + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - a = stack.pop() - stack.append(a) - stack.append(a) + a = ctx.stack.pop() + ctx.stack.append(a) + ctx.stack.append(a) elif op.operand == Intrinsic.SWAP: - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a = stack.pop() - b = stack.pop() - stack.append(a) - stack.append(b) + a = ctx.stack.pop() + b = ctx.stack.pop() + ctx.stack.append(a) + ctx.stack.append(b) elif op.operand == Intrinsic.DROP: - if len(stack) < 1: + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - stack.pop() + ctx.stack.pop() elif op.operand == Intrinsic.OVER: - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a = stack.pop() - b = stack.pop() - stack.append(b) - stack.append(a) - stack.append(b) + a = ctx.stack.pop() + b = ctx.stack.pop() + ctx.stack.append(b) + ctx.stack.append(a) + ctx.stack.append(b) elif op.operand == Intrinsic.ROT: - if len(stack) < 3: + if len(ctx.stack) < 3: not_enough_arguments(op) exit(1) - a = stack.pop() - b = stack.pop() - c = stack.pop() - stack.append(b) - stack.append(a) - stack.append(c) - elif op.operand == Intrinsic.MEM: - stack.append((DataType.PTR, op.token)) - elif op.operand == Intrinsic.LOAD: - assert len(DataType) == 3, "Exhaustive type handling in LOAD intrinsic" - if len(stack) < 1: + a = ctx.stack.pop() + b = ctx.stack.pop() + c = ctx.stack.pop() + ctx.stack.append(b) + ctx.stack.append(a) + ctx.stack.append(c) + elif op.operand == Intrinsic.LOAD8: + assert len(DataType) == 3, "Exhaustive type handling in LOAD8 intrinsic" + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() if a_type == DataType.PTR: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) else: - compiler_error_with_expansion_stack(op.token, "invalid argument type for LOAD intrinsic: %s" % a_type) + compiler_error_with_expansion_stack(op.token, "invalid argument type for LOAD8 intrinsic: %s" % a_type) exit(1) - elif op.operand == Intrinsic.STORE: - assert len(DataType) == 3, "Exhaustive type handling in STORE intrinsic" - if len(stack) < 2: + elif op.operand == Intrinsic.STORE8: + assert len(DataType) == 3, "Exhaustive type handling in STORE8 intrinsic" + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() - if a_type == DataType.INT and b_type == DataType.PTR: + if a_type == DataType.PTR and b_type == DataType.INT: pass else: - compiler_error_with_expansion_stack(op.token, "invalid argument type for STORE intrinsic") + compiler_error_with_expansion_stack(op.token, "invalid argument type for STORE8 intrinsic") exit(1) - elif op.operand == Intrinsic.FORTH_LOAD: - assert len(DataType) == 3, "Exhaustive type handling in LOAD intrinsic" - if len(stack) < 1: + elif op.operand == Intrinsic.LOAD16: + assert len(DataType) == 3, "Exhaustive type handling in LOAD16 intrinsic" + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() if a_type == DataType.PTR: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) else: - compiler_error_with_expansion_stack(op.token, "invalid argument type for LOAD intrinsic: %s" % a_type) + compiler_error_with_expansion_stack(op.token, "invalid argument type for LOAD16 intrinsic: %s" % a_type) exit(1) - elif op.operand == Intrinsic.FORTH_STORE: - assert len(DataType) == 3, "Exhaustive type handling in STORE intrinsic" - if len(stack) < 2: + elif op.operand == Intrinsic.STORE16: + assert len(DataType) == 3, "Exhaustive type handling in STORE16 intrinsic" + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if a_type == DataType.PTR and b_type == DataType.INT: pass else: - compiler_error_with_expansion_stack(op.token, "invalid argument type for STORE intrinsic") + compiler_error_with_expansion_stack(op.token, "invalid argument type for STORE16 intrinsic") exit(1) - elif op.operand == Intrinsic.LOAD64: - assert len(DataType) == 3, "Exhaustive type handling in LOAD64 intrinsic" - if len(stack) < 1: + elif op.operand == Intrinsic.LOAD32: + assert len(DataType) == 3, "Exhaustive type handling in LOAD32 intrinsic" + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() if a_type == DataType.PTR: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) else: - compiler_error_with_expansion_stack(op.token, "invalid argument type for LOAD64 intrinsic") + compiler_error_with_expansion_stack(op.token, "invalid argument type for LOAD32 intrinsic: %s" % a_type) exit(1) - elif op.operand == Intrinsic.STORE64: - assert len(DataType) == 3, "Exhaustive type handling in STORE64 intrinsic" - if len(stack) < 2: + elif op.operand == Intrinsic.STORE32: + assert len(DataType) == 3, "Exhaustive type handling in STORE32 intrinsic" + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() - if (a_type == DataType.INT or a_type == DataType.PTR) and b_type == DataType.PTR: + if a_type == DataType.PTR and b_type == DataType.INT: pass else: - compiler_error_with_expansion_stack(op.token, "invalid argument type for STORE64 intrinsic: %s" % [b_type, a_type]) + compiler_error_with_expansion_stack(op.token, "invalid argument type for STORE32 intrinsic") exit(1) - elif op.operand == Intrinsic.FORTH_LOAD64: + elif op.operand == Intrinsic.LOAD64: assert len(DataType) == 3, "Exhaustive type handling in LOAD64 intrinsic" - if len(stack) < 1: + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() if a_type == DataType.PTR: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) else: compiler_error_with_expansion_stack(op.token, "invalid argument type for LOAD64 intrinsic") exit(1) - elif op.operand == Intrinsic.FORTH_STORE64: + elif op.operand == Intrinsic.STORE64: assert len(DataType) == 3, "Exhaustive type handling in STORE64 intrinsic" - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) - a_type, a_loc = stack.pop() - b_type, b_loc = stack.pop() + a_type, a_loc = ctx.stack.pop() + b_type, b_loc = ctx.stack.pop() if (b_type == DataType.INT or b_type == DataType.PTR) and a_type == DataType.PTR: pass @@ -925,174 +999,147 @@ def type_check_program(program: Program): compiler_error_with_expansion_stack(op.token, "invalid argument type for STORE64 intrinsic: %s" % [b_type, a_type]) exit(1) elif op.operand == Intrinsic.CAST_PTR: - if len(stack) < 1: + if len(ctx.stack) < 1: + not_enough_arguments(op) + exit(1) + + a_type, a_token = ctx.stack.pop() + + ctx.stack.append((DataType.PTR, a_token)) + elif op.operand == Intrinsic.CAST_INT: + if len(ctx.stack) < 1: + not_enough_arguments(op) + exit(1) + + a_type, a_token = ctx.stack.pop() + + ctx.stack.append((DataType.INT, a_token)) + elif op.operand == Intrinsic.CAST_BOOL: + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - a_type, a_token = stack.pop() + a_type, a_token = ctx.stack.pop() - stack.append((DataType.PTR, a_token)) + ctx.stack.append((DataType.BOOL, a_token)) elif op.operand == Intrinsic.ARGC: - stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.INT, op.token)) elif op.operand == Intrinsic.ARGV: - stack.append((DataType.PTR, op.token)) + ctx.stack.append((DataType.PTR, op.token)) elif op.operand == Intrinsic.HERE: - stack.append((DataType.INT, op.token)) - stack.append((DataType.PTR, op.token)) + ctx.stack.append((DataType.INT, op.token)) + ctx.stack.append((DataType.PTR, op.token)) # TODO: figure out how to type check syscall arguments and return types elif op.operand == Intrinsic.SYSCALL0: - if len(stack) < 1: + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) for i in range(1): - stack.pop() - stack.append((DataType.INT, op.token)) + ctx.stack.pop() + ctx.stack.append((DataType.INT, op.token)) elif op.operand == Intrinsic.SYSCALL1: - if len(stack) < 2: + if len(ctx.stack) < 2: not_enough_arguments(op) exit(1) for i in range(2): - stack.pop() - stack.append((DataType.INT, op.token)) + ctx.stack.pop() + ctx.stack.append((DataType.INT, op.token)) elif op.operand == Intrinsic.SYSCALL2: - if len(stack) < 3: + if len(ctx.stack) < 3: not_enough_arguments(op) exit(1) for i in range(3): - stack.pop() - stack.append((DataType.INT, op.token)) + ctx.stack.pop() + ctx.stack.append((DataType.INT, op.token)) elif op.operand == Intrinsic.SYSCALL3: - if len(stack) < 4: + if len(ctx.stack) < 4: not_enough_arguments(op) exit(1) for i in range(4): - stack.pop() - stack.append((DataType.INT, op.token)) + ctx.stack.pop() + ctx.stack.append((DataType.INT, op.token)) elif op.operand == Intrinsic.SYSCALL4: - if len(stack) < 5: + if len(ctx.stack) < 5: not_enough_arguments(op) exit(1) for i in range(5): - stack.pop() - stack.append((DataType.INT, op.token)) + ctx.stack.pop() + ctx.stack.append((DataType.INT, op.token)) elif op.operand == Intrinsic.SYSCALL5: - if len(stack) < 6: + if len(ctx.stack) < 6: not_enough_arguments(op) exit(1) for i in range(6): - stack.pop() - stack.append((DataType.INT, op.token)) + ctx.stack.pop() + ctx.stack.append((DataType.INT, op.token)) elif op.operand == Intrinsic.SYSCALL6: - if len(stack) < 7: + if len(ctx.stack) < 7: not_enough_arguments(op) exit(1) for i in range(7): - stack.pop() - stack.append((DataType.INT, op.token)) + ctx.stack.pop() + ctx.stack.append((DataType.INT, op.token)) else: assert False, "unreachable" + ctx.ip += 1 elif op.typ == OpType.IF: - block_stack.append((copy(stack), op.typ)) + if len(ctx.stack) < 1: + not_enough_arguments(op) + exit(1) + a_type, a_token = ctx.stack.pop() + if a_type != DataType.BOOL: + compiler_error_with_expansion_stack(op.token, "Invalid argument for the if condition. Expected BOOL.") + exit(1) + ctx.ip += 1 + assert isinstance(op.operand, OpAddr) + contexts.append(Context(stack=copy(ctx.stack), ip=op.operand, ret_stack=copy(ctx.ret_stack))) + ctx = contexts[-1] + elif op.typ == OpType.IFSTAR: + if len(ctx.stack) < 1: + not_enough_arguments(op) + exit(1) + a_type, a_token = ctx.stack.pop() + if a_type != DataType.BOOL: + compiler_error_with_expansion_stack(op.token, "Invalid argument for the `if*` condition. Expected BOOL.") + exit(1) + ctx.ip += 1 + assert isinstance(op.operand, OpAddr) + contexts.append(Context(stack=copy(ctx.stack), ip=op.operand, ret_stack=copy(ctx.ret_stack))) + ctx = contexts[-1] elif op.typ == OpType.WHILE: - block_stack.append((copy(stack), op.typ)) + ctx.ip += 1 elif op.typ == OpType.END: - block_snapshot, block_type = block_stack.pop() - assert len(OpType) == 10, "Exhaustive handling of op types" - if block_type == OpType.ELSE: - expected_types = list(map(lambda x: x[0], block_snapshot)) - actual_types = list(map(lambda x: x[0], stack)) - if expected_types != actual_types: - compiler_error_with_expansion_stack(op.token, 'all branches of the if-block must produce the same types of the arguments on the data stack') - compiler_note(op.token.loc, 'Expected types: %s' % expected_types) - compiler_note(op.token.loc, 'Actual types: %s' % actual_types) - exit(1) - elif block_type == OpType.ELIF: - expected_types = list(map(lambda x: x[0], block_snapshot)) - actual_types = list(map(lambda x: x[0], stack)) - if expected_types != actual_types: - compiler_error_with_expansion_stack(op.token, 'all branches of the if-block must produce the same types of the arguments on the data stack') - compiler_note(op.token.loc, 'Expected types: %s' % expected_types) - compiler_note(op.token.loc, 'Actual types: %s' % actual_types) - exit(1) - elif block_type == OpType.DO: - begin_snapshot, begin_type = block_stack.pop() - - if begin_type == OpType.WHILE: - expected_types = list(map(lambda x: x[0], begin_snapshot)) - actual_types = list(map(lambda x: x[0], stack)) - - if expected_types != actual_types: - compiler_error_with_expansion_stack(op.token, 'while-do body is not allowed to alter the types of the arguments on the data stack') - compiler_note(op.token.loc, 'Expected types: %s' % expected_types) - compiler_note(op.token.loc, 'Actual types: %s' % actual_types) - exit(1) - - stack = block_snapshot - elif begin_type == OpType.IF: - expected_types = list(map(lambda x: x[0], begin_snapshot)) - actual_types = list(map(lambda x: x[0], stack)) - - if expected_types != actual_types: - compiler_error_with_expansion_stack(op.token, 'else-less if block is not allowed to alter the types of the arguments on the data stack') - compiler_note(op.token.loc, 'Expected types: %s' % expected_types) - compiler_note(op.token.loc, 'Actual types: %s' % actual_types) - exit(1) - - stack = block_snapshot - else: - assert "unreachable" - else: - assert "unreachable" + assert isinstance(op.operand, OpAddr) + ctx.ip = op.operand elif op.typ == OpType.ELSE: - do_snapshot, do_type = block_stack.pop() - assert do_type == OpType.DO - - pre_do_snapshot, pre_do_type = block_stack.pop() - assert pre_do_type == OpType.IF or pre_do_type == OpType.ELIF, pre_do_type - - if pre_do_type == OpType.ELIF: - expected_types = list(map(lambda x: x[0], pre_do_snapshot)) - actual_types = list(map(lambda x: x[0], stack)) - if expected_types != actual_types: - compiler_error_with_expansion_stack(op.token, 'all branches of the if-block must produce the same types of the arguments on the data stack') - compiler_note(op.token.loc, 'Expected types: %s' % expected_types) - compiler_note(op.token.loc, 'Actual types: %s' % actual_types) - exit(1) - - block_stack.append((copy(stack), op.typ)) - stack = do_snapshot - elif op.typ == OpType.ELIF: - do_snapshot, do_type = block_stack.pop() - assert do_type == OpType.DO - - pre_do_snapshot, pre_do_type = block_stack.pop() - assert pre_do_type == OpType.IF or pre_do_type == OpType.ELIF, pre_do_type - - if pre_do_type == OpType.ELIF: - expected_types = list(map(lambda x: x[0], pre_do_snapshot)) - actual_types = list(map(lambda x: x[0], stack)) - if expected_types != actual_types: - compiler_error_with_expansion_stack(op.token, 'all branches of the if-block must produce the same types of the arguments on the data stack') - compiler_note(op.token.loc, 'Expected types: %s' % expected_types) - compiler_note(op.token.loc, 'Actual types: %s' % actual_types) - exit(1) - - block_stack.append((copy(stack), op.typ)) - stack = do_snapshot + assert isinstance(op.operand, OpAddr) + ctx.ip = op.operand elif op.typ == OpType.DO: - if len(stack) < 1: + assert isinstance(op.operand, OpAddr) + if len(ctx.stack) < 1: not_enough_arguments(op) exit(1) - a_type, a_token = stack.pop() + a_type, a_token = ctx.stack.pop() if a_type != DataType.BOOL: compiler_error_with_expansion_stack(op.token, "Invalid argument for the while-do condition. Expected BOOL.") exit(1) - block_stack.append((copy(stack), op.typ)) + call_path = tuple(ctx.ret_stack + [ctx.ip]) + if call_path in visited_dos: + expected_types = list(map(lambda x: x[0], visited_dos[call_path])) + actual_types = list(map(lambda x: x[0], ctx.stack)) + if expected_types != actual_types: + compiler_error_with_expansion_stack(op.token, 'Loops are not allowed to alter types and amount of elements on the stack.') + compiler_note(op.token.loc, 'Expected elements: %s' % expected_types) + compiler_note(op.token.loc, 'Actual elements: %s' % actual_types) + exit(1) + contexts.pop() + else: + visited_dos[call_path] = copy(ctx.stack) + ctx.ip += 1 + contexts.append(Context(stack=copy(ctx.stack), ip=op.operand, ret_stack=copy(ctx.ret_stack))) + ctx = contexts[-1] else: assert False, "unreachable" - if len(stack) != 0: - compiler_error_with_expansion_stack(stack[-1][1], "unhandled data on the stack: %s" % list(map(lambda x: x[0], stack))) - exit(1) def generate_nasm_linux_x86_64(program: Program, out_file_path: str): strs: List[bytes] = [] @@ -1135,20 +1182,21 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write("global _start\n") out.write("_start:\n") out.write(" mov [args_ptr], rsp\n") - for ip in range(len(program)): - op = program[ip] - assert len(OpType) == 10, "Exhaustive ops handling in generate_nasm_linux_x86_64" + out.write(" mov rax, ret_stack_end\n") + out.write(" mov [ret_stack_rsp], rax\n") + for ip in range(len(program.ops)): + op = program.ops[ip] + assert len(OpType) == 16, "Exhaustive ops handling in generate_nasm_linux_x86_64" out.write("addr_%d:\n" % ip) + out.write(" ;; -- %s:%d:%d: %s (%s) --\n" % (op.token.loc + (repr(op.token.text), op.typ))) if op.typ == OpType.PUSH_INT: - assert isinstance(op.operand, int), "This could be a bug in the parsing step" - out.write(" ;; -- push int %d --\n" % op.operand) + assert isinstance(op.operand, int), f"This could be a bug in the parsing step {op.operand}" out.write(" mov rax, %d\n" % op.operand) out.write(" push rax\n") elif op.typ == OpType.PUSH_STR: assert isinstance(op.operand, str), "This could be a bug in the parsing step" value = op.operand.encode('utf-8') n = len(value) - out.write(" ;; -- push str --\n") out.write(" mov rax, %d\n" % n) out.write(" push rax\n") out.write(" push str_%d\n" % len(strs)) @@ -1156,54 +1204,76 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): elif op.typ == OpType.PUSH_CSTR: assert isinstance(op.operand, str), "This could be a bug in the parsing step" value = op.operand.encode('utf-8') + b'\0' - out.write(" ;; -- push str --\n") out.write(" push str_%d\n" % len(strs)) strs.append(value) - elif op.typ == OpType.IF: - out.write(" ;; -- if --\n") + elif op.typ == OpType.PUSH_MEM: + assert isinstance(op.operand, MemAddr), "This could be a bug in the parsing step" + out.write(" mov rax, mem\n") + out.write(" add rax, %d\n" % op.operand) + out.write(" push rax\n") + elif op.typ == OpType.PUSH_LOCAL_MEM: + assert isinstance(op.operand, MemAddr) + out.write(" mov rax, [ret_stack_rsp]\n"); + out.write(" add rax, %d\n" % op.operand) + out.write(" push rax\n") + elif op.typ in [OpType.IF, OpType.IFSTAR]: + out.write(" pop rax\n") + out.write(" test rax, rax\n") + assert isinstance(op.operand, OpAddr), f"This could be a bug in the parsing step {op.operand}" + out.write(" jz addr_%d\n" % op.operand) elif op.typ == OpType.WHILE: - out.write(" ;; -- while --\n") + pass elif op.typ == OpType.ELSE: - out.write(" ;; -- else --\n") assert isinstance(op.operand, OpAddr), "This could be a bug in the parsing step" out.write(" jmp addr_%d\n" % op.operand) - elif op.typ == OpType.ELIF: - out.write(" ;; -- elif --\n") - assert isinstance(op.operand, OpAddr), f"This could be a bug in the parsing step: {op.operand}" - out.write(" jmp addr_%d\n" % op.operand) elif op.typ == OpType.END: assert isinstance(op.operand, int), "This could be a bug in the parsing step" - out.write(" ;; -- end --\n") if ip + 1 != op.operand: out.write(" jmp addr_%d\n" % op.operand) elif op.typ == OpType.DO: - out.write(" ;; -- do --\n") out.write(" pop rax\n") out.write(" test rax, rax\n") assert isinstance(op.operand, int), "This could be a bug in the parsing step" out.write(" jz addr_%d\n" % op.operand) + elif op.typ == OpType.SKIP_PROC: + assert isinstance(op.operand, OpAddr), f"This could be a bug in the parsing step: {op.operand}" + out.write(" jmp addr_%d\n" % op.operand) + elif op.typ == OpType.PREP_PROC: + assert isinstance(op.operand, int) + out.write(" sub rsp, %d\n" % op.operand) + out.write(" mov [ret_stack_rsp], rsp\n") + out.write(" mov rsp, rax\n") + elif op.typ == OpType.CALL: + assert isinstance(op.operand, OpAddr), f"This could be a bug in the parsing step: {op.operand}" + out.write(" mov rax, rsp\n") + out.write(" mov rsp, [ret_stack_rsp]\n") + out.write(" call addr_%d\n" % op.operand) + out.write(" mov [ret_stack_rsp], rsp\n") + out.write(" mov rsp, rax\n") + elif op.typ == OpType.RET: + assert isinstance(op.operand, int) + out.write(" mov rax, rsp\n") + out.write(" mov rsp, [ret_stack_rsp]\n") + out.write(" add rsp, %d\n" % op.operand) + out.write(" ret\n") elif op.typ == OpType.INTRINSIC: - assert len(Intrinsic) == 41, "Exhaustive intrinsic handling in generate_nasm_linux_x86_64()" + assert len(Intrinsic) == 42, "Exhaustive intrinsic handling in generate_nasm_linux_x86_64()" if op.operand == Intrinsic.PLUS: - out.write(" ;; -- plus --\n") out.write(" pop rax\n") out.write(" pop rbx\n") out.write(" add rax, rbx\n") out.write(" push rax\n") elif op.operand == Intrinsic.MINUS: - out.write(" ;; -- minus --\n") out.write(" pop rax\n") out.write(" pop rbx\n") out.write(" sub rbx, rax\n") out.write(" push rbx\n") elif op.operand == Intrinsic.MUL: - out.write(" ;; -- mul --\n") out.write(" pop rax\n") out.write(" pop rbx\n") out.write(" mul rbx\n") out.write(" push rax\n") elif op.operand == Intrinsic.DIVMOD: - out.write(" ;; -- mod --\n") out.write(" xor rdx, rdx\n") out.write(" pop rbx\n") out.write(" pop rax\n") @@ -1211,40 +1281,33 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" push rax\n"); out.write(" push rdx\n"); elif op.operand == Intrinsic.SHR: - out.write(" ;; -- shr --\n") out.write(" pop rcx\n") out.write(" pop rbx\n") out.write(" shr rbx, cl\n") out.write(" push rbx\n") elif op.operand == Intrinsic.SHL: - out.write(" ;; -- shl --\n") out.write(" pop rcx\n") out.write(" pop rbx\n") out.write(" shl rbx, cl\n") out.write(" push rbx\n") elif op.operand == Intrinsic.OR: - out.write(" ;; -- bor --\n") out.write(" pop rax\n") out.write(" pop rbx\n") out.write(" or rbx, rax\n") out.write(" push rbx\n") elif op.operand == Intrinsic.AND: - out.write(" ;; -- band --\n") out.write(" pop rax\n") out.write(" pop rbx\n") out.write(" and rbx, rax\n") out.write(" push rbx\n") elif op.operand == Intrinsic.NOT: - out.write(" ;; -- not --\n") out.write(" pop rax\n") out.write(" not rax\n") out.write(" push rax\n") elif op.operand == Intrinsic.PRINT: - out.write(" ;; -- print --\n") out.write(" pop rdi\n") out.write(" call print\n") elif op.operand == Intrinsic.EQ: - out.write(" ;; -- equal --\n") out.write(" mov rcx, 0\n"); out.write(" mov rdx, 1\n"); out.write(" pop rax\n"); @@ -1253,7 +1316,6 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" cmove rcx, rdx\n"); out.write(" push rcx\n") elif op.operand == Intrinsic.GT: - out.write(" ;; -- gt --\n") out.write(" mov rcx, 0\n"); out.write(" mov rdx, 1\n"); out.write(" pop rbx\n"); @@ -1262,7 +1324,6 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" cmovg rcx, rdx\n"); out.write(" push rcx\n") elif op.operand == Intrinsic.LT: - out.write(" ;; -- gt --\n") out.write(" mov rcx, 0\n"); out.write(" mov rdx, 1\n"); out.write(" pop rbx\n"); @@ -1271,7 +1332,6 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" cmovl rcx, rdx\n"); out.write(" push rcx\n") elif op.operand == Intrinsic.GE: - out.write(" ;; -- gt --\n") out.write(" mov rcx, 0\n"); out.write(" mov rdx, 1\n"); out.write(" pop rbx\n"); @@ -1280,7 +1340,6 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" cmovge rcx, rdx\n"); out.write(" push rcx\n") elif op.operand == Intrinsic.LE: - out.write(" ;; -- gt --\n") out.write(" mov rcx, 0\n"); out.write(" mov rdx, 1\n"); out.write(" pop rbx\n"); @@ -1289,7 +1348,6 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" cmovle rcx, rdx\n"); out.write(" push rcx\n") elif op.operand == Intrinsic.NE: - out.write(" ;; -- ne --\n") out.write(" mov rcx, 0\n") out.write(" mov rdx, 1\n") out.write(" pop rbx\n") @@ -1298,121 +1356,98 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" cmovne rcx, rdx\n") out.write(" push rcx\n") elif op.operand == Intrinsic.DUP: - out.write(" ;; -- dup --\n") out.write(" pop rax\n") out.write(" push rax\n") out.write(" push rax\n") elif op.operand == Intrinsic.SWAP: - out.write(" ;; -- swap --\n") out.write(" pop rax\n") out.write(" pop rbx\n") out.write(" push rax\n") out.write(" push rbx\n") elif op.operand == Intrinsic.DROP: - out.write(" ;; -- drop --\n") out.write(" pop rax\n") elif op.operand == Intrinsic.OVER: - out.write(" ;; -- over --\n") out.write(" pop rax\n") out.write(" pop rbx\n") out.write(" push rbx\n") out.write(" push rax\n") out.write(" push rbx\n") elif op.operand == Intrinsic.ROT: - out.write(" ;; -- rot --\n") out.write(" pop rax\n") out.write(" pop rbx\n") out.write(" pop rcx\n") out.write(" push rbx\n") out.write(" push rax\n") out.write(" push rcx\n") - elif op.operand == Intrinsic.MEM: - out.write(" ;; -- mem --\n") - out.write(" push mem\n") - elif op.operand == Intrinsic.LOAD: - out.write(" ;; -- load --\n") + elif op.operand == Intrinsic.LOAD8: out.write(" pop rax\n") out.write(" xor rbx, rbx\n") out.write(" mov bl, [rax]\n") out.write(" push rbx\n") - elif op.operand == Intrinsic.STORE: - out.write(" ;; -- store --\n") - out.write(" pop rbx\n"); + elif op.operand == Intrinsic.STORE8: out.write(" pop rax\n"); + out.write(" pop rbx\n"); out.write(" mov [rax], bl\n"); - elif op.operand == Intrinsic.FORTH_LOAD: - out.write(" ;; -- forth load --\n") + elif op.operand == Intrinsic.LOAD16: out.write(" pop rax\n") out.write(" xor rbx, rbx\n") - out.write(" mov bl, [rax]\n") + out.write(" mov bx, [rax]\n") out.write(" push rbx\n") - elif op.operand == Intrinsic.FORTH_STORE: - out.write(" ;; -- store --\n") + elif op.operand == Intrinsic.STORE16: out.write(" pop rax\n"); out.write(" pop rbx\n"); - out.write(" mov [rax], bl\n"); + out.write(" mov [rax], bx\n"); + elif op.operand == Intrinsic.LOAD32: + out.write(" pop rax\n") + out.write(" xor rbx, rbx\n") + out.write(" mov ebx, [rax]\n") + out.write(" push rbx\n") + elif op.operand == Intrinsic.STORE32: + out.write(" pop rax\n"); + out.write(" pop rbx\n"); + out.write(" mov [rax], ebx\n"); + elif op.operand == Intrinsic.LOAD64: + out.write(" pop rax\n") + out.write(" xor rbx, rbx\n") + out.write(" mov rbx, [rax]\n") + out.write(" push rbx\n") + elif op.operand == Intrinsic.STORE64: + out.write(" pop rax\n"); + out.write(" pop rbx\n"); + out.write(" mov [rax], rbx\n"); elif op.operand == Intrinsic.ARGC: - out.write(" ;; -- argc --\n") out.write(" mov rax, [args_ptr]\n") out.write(" mov rax, [rax]\n") out.write(" push rax\n") elif op.operand == Intrinsic.ARGV: - out.write(" ;; -- argv --\n") out.write(" mov rax, [args_ptr]\n") out.write(" add rax, 8\n") out.write(" push rax\n") elif op.operand == Intrinsic.HERE: value = ("%s:%d:%d" % op.token.loc).encode('utf-8') n = len(value) - out.write(" ;; -- here --\n") out.write(" mov rax, %d\n" % n) out.write(" push rax\n") out.write(" push str_%d\n" % len(strs)) strs.append(value) - elif op.operand == Intrinsic.LOAD64: - out.write(" ;; -- load --\n") - out.write(" pop rax\n") - out.write(" xor rbx, rbx\n") - out.write(" mov rbx, [rax]\n") - out.write(" push rbx\n") - elif op.operand == Intrinsic.STORE64: - out.write(" ;; -- store --\n") - out.write(" pop rbx\n"); - out.write(" pop rax\n"); - out.write(" mov [rax], rbx\n"); - elif op.operand == Intrinsic.FORTH_LOAD64: - out.write(" ;; -- forth load64 --\n") - out.write(" pop rax\n") - out.write(" xor rbx, rbx\n") - out.write(" mov rbx, [rax]\n") - out.write(" push rbx\n") - elif op.operand == Intrinsic.FORTH_STORE64: - out.write(" ;; -- forth store64 --\n") - out.write(" pop rax\n"); - out.write(" pop rbx\n"); - out.write(" mov [rax], rbx\n"); - elif op.operand == Intrinsic.CAST_PTR: - out.write(" ;; -- cast(ptr) --\n") + elif op.operand in [Intrinsic.CAST_PTR, Intrinsic.CAST_INT, Intrinsic.CAST_BOOL]: + pass elif op.operand == Intrinsic.SYSCALL0: - out.write(" ;; -- syscall0 --\n") out.write(" pop rax\n") out.write(" syscall\n") out.write(" push rax\n") elif op.operand == Intrinsic.SYSCALL1: - out.write(" ;; -- syscall1 --\n") out.write(" pop rax\n") out.write(" pop rdi\n") out.write(" syscall\n") out.write(" push rax\n") elif op.operand == Intrinsic.SYSCALL2: - out.write(" ;; -- syscall2 --\n") out.write(" pop rax\n"); out.write(" pop rdi\n"); out.write(" pop rsi\n"); out.write(" syscall\n"); out.write(" push rax\n") elif op.operand == Intrinsic.SYSCALL3: - out.write(" ;; -- syscall3 --\n") out.write(" pop rax\n") out.write(" pop rdi\n") out.write(" pop rsi\n") @@ -1420,7 +1455,6 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" syscall\n") out.write(" push rax\n") elif op.operand == Intrinsic.SYSCALL4: - out.write(" ;; -- syscall4 --\n") out.write(" pop rax\n") out.write(" pop rdi\n") out.write(" pop rsi\n") @@ -1429,7 +1463,6 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" syscall\n") out.write(" push rax\n") elif op.operand == Intrinsic.SYSCALL5: - out.write(" ;; -- syscall5 --\n") out.write(" pop rax\n") out.write(" pop rdi\n") out.write(" pop rsi\n") @@ -1439,7 +1472,6 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write(" syscall\n") out.write(" push rax\n") elif op.operand == Intrinsic.SYSCALL6: - out.write(" ;; -- syscall6 --\n") out.write(" pop rax\n") out.write(" pop rdi\n") out.write(" pop rsi\n") @@ -1454,7 +1486,7 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): else: assert False, "unreachable" - out.write("addr_%d:\n" % len(program)) + out.write("addr_%d:\n" % len(program.ops)) out.write(" mov rax, 60\n") out.write(" mov rdi, 0\n") out.write(" syscall\n") @@ -1463,22 +1495,31 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): out.write("str_%d: db %s\n" % (index, ','.join(map(hex, list(s))))) out.write("segment .bss\n") out.write("args_ptr: resq 1\n") - out.write("mem: resb %d\n" % MEM_CAPACITY) + out.write("ret_stack_rsp: resq 1\n") + out.write("ret_stack: resb %d\n" % X86_64_RET_STACK_CAP) + out.write("ret_stack_end:\n") + out.write("mem: resb %d\n" % program.memory_capacity) -assert len(Keyword) == 8, "Exhaustive KEYWORD_NAMES definition." -KEYWORD_NAMES = { +assert len(Keyword) == 13, "Exhaustive KEYWORD_NAMES definition." +KEYWORD_BY_NAMES: Dict[str, Keyword] = { 'if': Keyword.IF, - 'elif': Keyword.ELIF, + 'if*': Keyword.IFSTAR, 'else': Keyword.ELSE, - 'end': Keyword.END, 'while': Keyword.WHILE, 'do': Keyword.DO, 'macro': Keyword.MACRO, 'include': Keyword.INCLUDE, + 'memory': Keyword.MEMORY, + 'proc': Keyword.PROC, + 'end': Keyword.END, + 'const': Keyword.CONST, + 'offset': Keyword.OFFSET, + 'reset': Keyword.RESET, } +KEYWORD_NAMES: Dict[Keyword, str] = {v: k for k, v in KEYWORD_BY_NAMES.items()} -assert len(Intrinsic) == 41, "Exhaustive INTRINSIC_BY_NAMES definition" -INTRINSIC_BY_NAMES = { +assert len(Intrinsic) == 42, "Exhaustive INTRINSIC_BY_NAMES definition" +INTRINSIC_BY_NAMES: Dict[str, Intrinsic] = { '+': Intrinsic.PLUS, '-': Intrinsic.MINUS, '*': Intrinsic.MUL, @@ -1500,16 +1541,17 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): 'drop': Intrinsic.DROP, 'over': Intrinsic.OVER, 'rot': Intrinsic.ROT, - 'mem': Intrinsic.MEM, - '.': Intrinsic.STORE, - ',': Intrinsic.LOAD, - '!': Intrinsic.FORTH_STORE, - '@': Intrinsic.FORTH_LOAD, - '.64': Intrinsic.STORE64, - ',64': Intrinsic.LOAD64, - '!64': Intrinsic.FORTH_STORE64, - '@64': Intrinsic.FORTH_LOAD64, + '!8': Intrinsic.STORE8, + '@8': Intrinsic.LOAD8, + '!16': Intrinsic.STORE16, + '@16': Intrinsic.LOAD16, + '!32': Intrinsic.STORE32, + '@32': Intrinsic.LOAD32, + '!64': Intrinsic.STORE64, + '@64': Intrinsic.LOAD64, 'cast(ptr)': Intrinsic.CAST_PTR, + 'cast(int)': Intrinsic.CAST_INT, + 'cast(bool)': Intrinsic.CAST_BOOL, 'argc': Intrinsic.ARGC, 'argv': Intrinsic.ARGV, 'here': Intrinsic.HERE, @@ -1521,28 +1563,52 @@ def generate_nasm_linux_x86_64(program: Program, out_file_path: str): 'syscall5': Intrinsic.SYSCALL5, 'syscall6': Intrinsic.SYSCALL6, } -INTRINSIC_NAMES = {v: k for k, v in INTRINSIC_BY_NAMES.items()} +INTRINSIC_NAMES: Dict[Intrinsic, str] = {v: k for k, v in INTRINSIC_BY_NAMES.items()} @dataclass class Macro: loc: Loc tokens: List[Token] -def human(obj: Union[TokenType, Op, Intrinsic]) -> str: +class HumanNumber(Enum): + Singular=auto() + Plural=auto() + +def human(obj: TokenType, number: HumanNumber = HumanNumber.Singular) -> str: '''Human readable representation of an object that can be used in error messages''' - assert len(TokenType) == 6, "Exhaustive handling of token types in human()" - if obj == TokenType.WORD: - return "a word" - elif obj == TokenType.INT: - return "an integer" - elif obj == TokenType.STR: - return "a string" - elif obj == TokenType.CSTR: - return "a C-style string" - elif obj == TokenType.CHAR: - return "a character" - elif obj == TokenType.KEYWORD: - return "a keyword" + assert len(HumanNumber) == 2, "Exhaustive handling of number category in human()" + if number == HumanNumber.Singular: + assert len(TokenType) == 6, "Exhaustive handling of token types in human()" + if obj == TokenType.WORD: + return "a word" + elif obj == TokenType.INT: + return "an integer" + elif obj == TokenType.STR: + return "a string" + elif obj == TokenType.CSTR: + return "a C-style string" + elif obj == TokenType.CHAR: + return "a character" + elif obj == TokenType.KEYWORD: + return "a keyword" + else: + assert False, "unreachable" + elif number == HumanNumber.Plural: + assert len(TokenType) == 6, "Exhaustive handling of token types in human()" + if obj == TokenType.WORD: + return "words" + elif obj == TokenType.INT: + return "integers" + elif obj == TokenType.STR: + return "strings" + elif obj == TokenType.CSTR: + return "C-style strings" + elif obj == TokenType.CHAR: + return "characters" + elif obj == TokenType.KEYWORD: + return "keywords" + else: + assert False, "unreachable" else: assert False, "unreachable" @@ -1553,11 +1619,129 @@ def expand_macro(macro: Macro, expanded_from: Token) -> List[Token]: token.expanded_count = expanded_from.expanded_count + 1 return result +@dataclass +class Memory: + offset: MemAddr + loc: Loc + +@dataclass +class Proc: + addr: OpAddr + loc: Loc + local_memories: Dict[str, Memory] + local_memory_capacity: int + +@dataclass +class Const: + value: int + loc: Loc + +def check_word_redefinition(token: Token, memories: Dict[str, Memory], macros: Dict[str, Macro], procs: Dict[str, Proc], consts: Dict[str, Const]): + assert token.typ == TokenType.WORD + assert isinstance(token.value, str) + name: str = token.value + if name in memories: + compiler_error_with_expansion_stack(token, "redefinition of a memory region `%s`" % name) + compiler_note(memories[name].loc, "the original definition is located here") + exit(1) + if name in INTRINSIC_BY_NAMES: + compiler_error_with_expansion_stack(token, "redefinition of an intrinsic word `%s`" % (name, )) + exit(1) + if name in macros: + compiler_error_with_expansion_stack(token, "redefinition of a macro `%s`" % (name, )) + compiler_note(macros[name].loc, "the original definition is located here") + exit(1) + if name in procs: + compiler_error_with_expansion_stack(token, "redefinition of a proc `%s`" % (name, )) + compiler_note(procs[name].loc, "the original definition is located here") + exit(1) + if name in consts: + compiler_error_with_expansion_stack(token, "redefinition of a constant `%s`" % (name, )) + compiler_note(consts[name].loc, "the original definition is located here") + exit(1) + +def eval_const_value(rtokens: List[Token], macros: Dict[str, Macro], consts: Dict[str, Const], iota: List[int]) -> int: + stack: List[int] = [] + while len(rtokens) > 0: + token = rtokens.pop() + if token.typ == TokenType.KEYWORD: + assert isinstance(token.value, Keyword) + if token.value == Keyword.END: + break + elif token.value == Keyword.OFFSET: + if len(stack) < 1: + compiler_error_with_expansion_stack(token, f"not enough arguments for `{KEYWORD_NAMES[token.value]}` keyword") + exit(1) + offset = stack.pop() + stack.append(iota[0]) + iota[0] += offset + elif token.value == Keyword.RESET: + stack.append(iota[0]) + iota[0] = 0 + else: + compiler_error_with_expansion_stack(token, f"unsupported keyword `{KEYWORD_NAMES[token.value]}` in compile time evaluation") + exit(1) + elif token.typ == TokenType.INT: + assert isinstance(token.value, int) + stack.append(token.value) + elif token.typ == TokenType.WORD: + assert isinstance(token.value, str) + if token.value == INTRINSIC_NAMES[Intrinsic.PLUS]: + if len(stack) < 2: + compiler_error_with_expansion_stack(token, f"not enough arguments for `{token.value}` intrinsic") + exit(1) + a = stack.pop() + b = stack.pop() + stack.append(a + b) + elif token.value == INTRINSIC_NAMES[Intrinsic.MUL]: + if len(stack) < 2: + compiler_error_with_expansion_stack(token, f"not enough arguments for `{token.value}` intrinsic") + exit(1) + a = stack.pop() + b = stack.pop() + stack.append(a * b) + elif token.value == INTRINSIC_NAMES[Intrinsic.DIVMOD]: + if len(stack) < 2: + compiler_error_with_expansion_stack(token, f"not enough arguments for `{token.value}` intrinsic") + exit(1) + a = stack.pop() + b = stack.pop() + stack.append(b//a) + stack.append(b%a) + elif token.value == INTRINSIC_NAMES[Intrinsic.DROP]: + if len(stack) < 1: + compiler_error_with_expansion_stack(token, f"not enough arguments for `{token.value}` intrinsic") + exit(1) + stack.pop() + elif token.value in macros: + if token.expanded_count >= expansion_limit: + compiler_error_with_expansion_stack(token, "the macro exceeded the expansion limit (it expanded %d times)" % token.expanded_count) + exit(1) + rtokens += reversed(expand_macro(macros[token.value], token)) + elif token.value in consts: + stack.append(consts[token.value].value) + else: + compiler_error_with_expansion_stack(token, f"unsupported word `{token.value}` in compile time evaluation") + exit(1) + else: + compiler_error_with_expansion_stack(token, f"{human(token.typ, HumanNumber.Plural)} are not supported in compile time evaluation") + exit(1) + if len(stack) != 1: + compiler_error_with_expansion_stack(token, "The result of expression in compile time evaluation must be a single number") + exit(1) + return stack.pop() + def parse_program_from_tokens(tokens: List[Token], include_paths: List[str], expansion_limit: int) -> Program: stack: List[OpAddr] = [] - program: List[Op] = [] + program: Program = Program(ops=[], memory_capacity=0) rtokens: List[Token] = list(reversed(tokens)) macros: Dict[str, Macro] = {} + memories: Dict[str, Memory] = {} + procs: Dict[str, Proc] = {} + consts: Dict[str, Const] = {} + current_proc: Optional[Proc] = None + iota: List[int] = [0] + # TODO: consider getting rid of the ip variable in parse_program_from_tokens() ip: OpAddr = 0; while len(rtokens) > 0: token = rtokens.pop() @@ -1565,115 +1749,144 @@ def parse_program_from_tokens(tokens: List[Token], include_paths: List[str], exp if token.typ == TokenType.WORD: assert isinstance(token.value, str), "This could be a bug in the lexer" if token.value in INTRINSIC_BY_NAMES: - program.append(Op(typ=OpType.INTRINSIC, token=token, operand=INTRINSIC_BY_NAMES[token.value])) + program.ops.append(Op(typ=OpType.INTRINSIC, token=token, operand=INTRINSIC_BY_NAMES[token.value])) ip += 1 elif token.value in macros: if token.expanded_count >= expansion_limit: compiler_error_with_expansion_stack(token, "the macro exceeded the expansion limit (it expanded %d times)" % token.expanded_count) exit(1) rtokens += reversed(expand_macro(macros[token.value], token)) + elif current_proc is not None and token.value in current_proc.local_memories: + program.ops.append(Op(typ=OpType.PUSH_LOCAL_MEM, token=token, operand=current_proc.local_memories[token.value].offset)) + ip += 1 + elif token.value in memories: + program.ops.append(Op(typ=OpType.PUSH_MEM, token=token, operand=memories[token.value].offset)) + ip += 1 + elif token.value in procs: + program.ops.append(Op(typ=OpType.CALL, token=token, operand=procs[token.value].addr)) + ip += 1 + elif token.value in consts: + program.ops.append(Op(typ=OpType.PUSH_INT, token=token, operand=consts[token.value].value)) + ip += 1 else: compiler_error_with_expansion_stack(token, "unknown word `%s`" % token.value) exit(1) elif token.typ == TokenType.INT: assert isinstance(token.value, int), "This could be a bug in the lexer" - program.append(Op(typ=OpType.PUSH_INT, operand=token.value, token=token)) + program.ops.append(Op(typ=OpType.PUSH_INT, operand=token.value, token=token)) ip += 1 elif token.typ == TokenType.STR: assert isinstance(token.value, str), "This could be a bug in the lexer" - program.append(Op(typ=OpType.PUSH_STR, operand=token.value, token=token)); + program.ops.append(Op(typ=OpType.PUSH_STR, operand=token.value, token=token)); ip += 1 elif token.typ == TokenType.CSTR: assert isinstance(token.value, str), "This could be a bug in the lexer" - program.append(Op(typ=OpType.PUSH_CSTR, operand=token.value, token=token)); + program.ops.append(Op(typ=OpType.PUSH_CSTR, operand=token.value, token=token)); ip += 1 elif token.typ == TokenType.CHAR: assert isinstance(token.value, int) - program.append(Op(typ=OpType.PUSH_INT, operand=token.value, token=token)); + program.ops.append(Op(typ=OpType.PUSH_INT, operand=token.value, token=token)); ip += 1 elif token.typ == TokenType.KEYWORD: - assert len(Keyword) == 8, "Exhaustive keywords handling in parse_program_from_tokens()" + assert len(Keyword) == 13, "Exhaustive keywords handling in parse_program_from_tokens()" if token.value == Keyword.IF: - program.append(Op(typ=OpType.IF, token=token)) + program.ops.append(Op(typ=OpType.IF, token=token)) stack.append(ip) ip += 1 - elif token.value == Keyword.ELIF: - program.append(Op(typ=OpType.ELIF, token=token)) - do_ip = stack.pop() - if program[do_ip].typ != OpType.DO: - compiler_error_with_expansion_stack(program[do_ip].token, '`elif` can only close `do`-blocks') - exit(1) - pre_do_ip = program[do_ip].operand - assert isinstance(pre_do_ip, OpAddr) - if program[pre_do_ip].typ == OpType.IF: - program[do_ip].operand = ip + 1 - stack.append(ip) - ip += 1 - elif program[pre_do_ip].typ == OpType.ELIF: - program[pre_do_ip].operand = ip - program[do_ip].operand = ip + 1 - stack.append(ip) - ip += 1 - else: - compiler_error_with_expansion_stack(program[pre_do_ip].token, '`elif` can only close `do`-blocks that are preceded by `if` or another `elif`') + elif token.value == Keyword.IFSTAR: + if len(stack) == 0: + compiler_error_with_expansion_stack(token, '`if*` can only come after `else`') + exit(1) + + else_ip = stack[-1] + if program.ops[else_ip].typ != OpType.ELSE: + compiler_error_with_expansion_stack(program.ops[else_ip].token, '`if*` can only come after `else`') exit(1) + program.ops.append(Op(typ=OpType.IFSTAR, token=token)) + stack.append(ip) + ip += 1 elif token.value == Keyword.ELSE: - program.append(Op(typ=OpType.ELSE, token=token)) - do_ip = stack.pop() - if program[do_ip].typ != OpType.DO: - compiler_error_with_expansion_stack(program[do_ip].token, '`else` can only be used in `do` blocks') - exit(1) - pre_do_ip = program[do_ip].operand - assert isinstance(pre_do_ip, OpAddr) - if program[pre_do_ip].typ == OpType.IF: - program[do_ip].operand = ip + 1 + if len(stack) == 0: + compiler_error_with_expansion_stack(token, '`else` can only come after `if` or `if*`') + exit(1) + + if_ip = stack.pop() + if program.ops[if_ip].typ == OpType.IF: + program.ops[if_ip].operand = ip + 1 stack.append(ip) + program.ops.append(Op(typ=OpType.ELSE, token=token)) ip += 1 - elif program[pre_do_ip].typ == OpType.ELIF: - program[pre_do_ip].operand = ip - program[do_ip].operand = ip + 1 + elif program.ops[if_ip].typ == OpType.IFSTAR: + else_before_ifstar_ip = None if len(stack) == 0 else stack.pop() + assert else_before_ifstar_ip is not None and program.ops[else_before_ifstar_ip].typ == OpType.ELSE, "At this point we should've already checked that `if*` comes after `else`. Otherwise this is a compiler bug." + + program.ops[if_ip].operand = ip + 1 + program.ops[else_before_ifstar_ip].operand = ip + stack.append(ip) + program.ops.append(Op(typ=OpType.ELSE, token=token)) ip += 1 else: - compiler_error_with_expansion_stack(program[pre_do_ip].token, '`else` can only close `do`-blocks that are preceded by `if` or `elif`') + compiler_error_with_expansion_stack(program.ops[if_ip].token, f'`else` can only come after `if` or `if*`') exit(1) elif token.value == Keyword.END: - program.append(Op(typ=OpType.END, token=token)) block_ip = stack.pop() - if program[block_ip].typ == OpType.ELSE: - program[block_ip].operand = ip - program[ip].operand = ip + 1 - elif program[block_ip].typ == OpType.DO: - assert program[block_ip].operand is not None - pre_do_ip = program[block_ip].operand - - assert isinstance(pre_do_ip, OpAddr) - if program[pre_do_ip].typ == OpType.WHILE: - program[ip].operand = pre_do_ip - program[block_ip].operand = ip + 1 - elif program[pre_do_ip].typ == OpType.IF: - program[ip].operand = ip + 1 - program[block_ip].operand = ip + 1 - elif program[pre_do_ip].typ == OpType.ELIF: - program[pre_do_ip].operand = ip - program[ip].operand = ip + 1 - program[block_ip].operand = ip + 1 - else: - compiler_error_with_expansion_stack(program[pre_do_ip].token, '`end` can only close `do` blocks that are preceded by `if`, `while` or `elif`') + + if program.ops[block_ip].typ == OpType.ELSE: + program.ops.append(Op(typ=OpType.END, token=token)) + program.ops[block_ip].operand = ip + program.ops[ip].operand = ip + 1 + elif program.ops[block_ip].typ == OpType.DO: + program.ops.append(Op(typ=OpType.END, token=token)) + assert program.ops[block_ip].operand is not None + while_ip = program.ops[block_ip].operand + assert isinstance(while_ip, OpAddr) + + if program.ops[while_ip].typ != OpType.WHILE: + compiler_error_with_expansion_stack(program.ops[while_ip].token, '`end` can only close `do` blocks that are preceded by `while`') exit(1) + + program.ops[ip].operand = while_ip + program.ops[block_ip].operand = ip + 1 + elif program.ops[block_ip].typ == OpType.PREP_PROC: + assert current_proc is not None + program.ops[block_ip].operand = current_proc.local_memory_capacity + block_ip = stack.pop() + assert program.ops[block_ip].typ == OpType.SKIP_PROC + program.ops.append(Op(typ=OpType.RET, token=token, operand=current_proc.local_memory_capacity)) + program.ops[block_ip].operand = ip + 1 + current_proc = None + elif program.ops[block_ip].typ == OpType.IFSTAR: + else_before_ifstar_ip = None if len(stack) == 0 else stack.pop() + assert else_before_ifstar_ip is not None and program.ops[else_before_ifstar_ip].typ == OpType.ELSE, "At this point we should've already checked that `if*` comes after `else`. Otherwise this is a compiler bug." + + program.ops.append(Op(typ=OpType.END, token=token)) + program.ops[block_ip].operand = ip + program.ops[else_before_ifstar_ip].operand = ip + program.ops[ip].operand = ip + 1 + elif program.ops[block_ip].typ == OpType.IF: + program.ops.append(Op(typ=OpType.END, token=token)) + program.ops[block_ip].operand = ip + program.ops[ip].operand = ip + 1 else: - compiler_error_with_expansion_stack(program[block_ip].token, '`end` can only close `else`, `do` or `macro` blocks for now') + # NOTE: the closing of `macro` blocks is handled in its own separate place, not here + compiler_error_with_expansion_stack(program.ops[block_ip].token, '`end` can only close `if`, `else`, `do`, `macro` or `proc` blocks for now') exit(1) ip += 1 elif token.value == Keyword.WHILE: - program.append(Op(typ=OpType.WHILE, token=token)) + program.ops.append(Op(typ=OpType.WHILE, token=token)) stack.append(ip) ip += 1 elif token.value == Keyword.DO: - program.append(Op(typ=OpType.DO, token=token)) - pre_do_ip = stack.pop() - assert program[pre_do_ip].typ == OpType.WHILE or program[pre_do_ip].typ == OpType.IF or program[pre_do_ip].typ == OpType.ELIF - program[ip].operand = pre_do_ip + program.ops.append(Op(typ=OpType.DO, token=token)) + if len(stack) == 0: + compiler_error_with_expansion_stack(token, "`do` is not preceded by `while`") + exit(1) + while_ip = stack.pop() + if program.ops[while_ip].typ != OpType.WHILE: + compiler_error_with_expansion_stack(token, "`do` is not preceded by `while`") + exit(1) + program.ops[ip].operand = while_ip stack.append(ip) ip += 1 elif token.value == Keyword.INCLUDE: @@ -1699,6 +1912,43 @@ def parse_program_from_tokens(tokens: List[Token], include_paths: List[str], exp if not file_included: compiler_error_with_expansion_stack(token, "file `%s` not found" % token.value) exit(1) + elif token.value == Keyword.CONST: + if len(rtokens) == 0: + compiler_error_with_expansion_stack(token, "expected const name but found nothing") + exit(1) + token = rtokens.pop() + if token.typ != TokenType.WORD: + compiler_error_with_expansion_stack(token, "expected const name to be %s but found %s" % (human(TokenType.WORD), human(token.typ))) + exit(1) + assert isinstance(token.value, str), "This is probably a bug in the lexer" + const_name = token.value + const_loc = token.loc + check_word_redefinition(token, memories, macros, procs, consts) + const_value = eval_const_value(rtokens, macros, consts, iota) + consts[const_name] = Const(value=const_value, loc=const_loc) + elif token.value == Keyword.MEMORY: + + if len(rtokens) == 0: + compiler_error_with_expansion_stack(token, "expected memory name but found nothing") + exit(1) + token = rtokens.pop() + if token.typ != TokenType.WORD: + compiler_error_with_expansion_stack(token, "expected memory name to be %s but found %s" % (human(TokenType.WORD), human(token.typ))) + exit(1) + assert isinstance(token.value, str), "This is probably a bug in the lexer" + memory_name = token.value + memory_loc = token.loc + memory_size = eval_const_value(rtokens, macros, consts, iota) + if current_proc is None: + check_word_redefinition(token, memories, macros, procs, consts) + memories[memory_name] = Memory(offset=program.memory_capacity, loc=memory_loc) + program.memory_capacity += memory_size + else: + # TODO: local memory regions can shadow the global ones + # Is that something we actually want? + check_word_redefinition(token, current_proc.local_memories, macros, procs, consts) + current_proc.local_memories[memory_name] = Memory(offset=current_proc.local_memory_capacity, loc=memory_loc) + current_proc.local_memory_capacity += memory_size elif token.value == Keyword.MACRO: if len(rtokens) == 0: compiler_error_with_expansion_stack(token, "expected macro name but found nothing") @@ -1708,13 +1958,7 @@ def parse_program_from_tokens(tokens: List[Token], include_paths: List[str], exp compiler_error_with_expansion_stack(token, "expected macro name to be %s but found %s" % (human(TokenType.WORD), human(token.typ))) exit(1) assert isinstance(token.value, str), "This is probably a bug in the lexer" - if token.value in macros: - compiler_error_with_expansion_stack(token, "redefinition of already existing macro `%s`" % token.value) - compiler_note(macros[token.value].loc, "the first definition is located here") - exit(1) - if token.value in INTRINSIC_BY_NAMES: - compiler_error_with_expansion_stack(token, "redefinition of an intrinsic word `%s`. Please choose a different name for your macro." % (token.value, )) - exit(1) + check_word_redefinition(token, memories, macros, procs, consts) macro = Macro(token.loc, []) macros[token.value] = macro nesting_depth = 0 @@ -1725,21 +1969,54 @@ def parse_program_from_tokens(tokens: List[Token], include_paths: List[str], exp else: macro.tokens.append(token) if token.typ == TokenType.KEYWORD: - if token.value in [Keyword.IF, Keyword.WHILE, Keyword.MACRO]: + assert len(Keyword) == 13, "Exhaustive handling of keywords in parsing macro body" + if token.value in [Keyword.IF, Keyword.WHILE, Keyword.MACRO, Keyword.MEMORY, Keyword.PROC, Keyword.CONST]: nesting_depth += 1 elif token.value == Keyword.END: nesting_depth -= 1 if token.typ != TokenType.KEYWORD or token.value != Keyword.END: compiler_error_with_expansion_stack(token, "expected `end` at the end of the macro definition but got `%s`" % (token.value, )) exit(1) + elif token.value == Keyword.PROC: + if current_proc is None: + program.ops.append(Op(typ=OpType.SKIP_PROC, token=token)) + proc_addr = ip + + stack.append(ip) + ip += 1 + + program.ops.append(Op(typ=OpType.PREP_PROC, token=token)) + stack.append(ip) + ip += 1 + + if len(rtokens) == 0: + compiler_error_with_expansion_stack(token, "expected procedure name but found nothing") + exit(1) + token = rtokens.pop() + if token.typ != TokenType.WORD: + compiler_error_with_expansion_stack(token, "expected procedure name to be %s but found %s" % (human(TokenType.WORD), human(token.typ))) + exit(1) + assert isinstance(token.value, str), "This is probably a bug in the lexer" + proc_loc = token.loc + proc_name = token.value + check_word_redefinition(token, memories, macros, procs, consts) + procs[proc_name] = Proc(addr=proc_addr + 1, loc=token.loc, local_memories={}, local_memory_capacity=0) + current_proc = procs[proc_name] + else: + # TODO: forbid constant definition inside of proc + # TODO: forbid macro definition inside of proc + compiler_error_with_expansion_stack(token, "defining procedures inside of procedures is not allowed") + compiler_note(current_proc.loc, "the current procedure starts here") + elif token.value in [Keyword.OFFSET, Keyword.RESET]: + compiler_error_with_expansion_stack(token, f"keyword `{token.text}` is supported only in compile time evaluation context") + exit(1) else: assert False, 'unreachable'; else: assert False, 'unreachable' - if len(stack) > 0: - compiler_error_with_expansion_stack(program[stack.pop()].token, 'unclosed block') + compiler_error_with_expansion_stack(program.ops[stack.pop()].token, 'unclosed block') exit(1) return program @@ -1821,9 +2098,11 @@ def lex_lines(file_path: str, lines: List[str]) -> Generator[Token, None, None]: try: yield Token(TokenType.INT, text_of_token, loc, int(text_of_token)) except ValueError: - if text_of_token in KEYWORD_NAMES: - yield Token(TokenType.KEYWORD, text_of_token, loc, KEYWORD_NAMES[text_of_token]) + if text_of_token in KEYWORD_BY_NAMES: + yield Token(TokenType.KEYWORD, text_of_token, loc, KEYWORD_BY_NAMES[text_of_token]) else: + # TODO: `69//` is recognized as a single word + # And not a number plus a comment if text_of_token.startswith("//"): break yield Token(TokenType.WORD, text_of_token, loc, text_of_token) @@ -1847,12 +2126,16 @@ def cmd_call_echoed(cmd: List[str], silent: bool) -> int: print("[CMD] %s" % " ".join(map(shlex.quote, cmd))) return subprocess.call(cmd) +# TODO: with a lot of procs the control flow graphs becomes useless even on small programs +# Maybe we should eliminate unreachable code or something +# TODO: test.py never touches generate_control_flow_graph_as_dot_file +# Which leads to constantly forgetting to update the implementation def generate_control_flow_graph_as_dot_file(program: Program, dot_path: str): with open(dot_path, "w") as f: f.write("digraph Program {\n") - assert len(OpType) == 10, "Exhaustive handling of OpType in generate_control_flow_graph_as_dot_file()" - for ip in range(len(program)): - op = program[ip] + assert len(OpType) == 16, f"Exhaustive handling of OpType in generate_control_flow_graph_as_dot_file(), {len(OpType)}" + for ip in range(len(program.ops)): + op = program.ops[ip] if op.typ == OpType.INTRINSIC: assert isinstance(op.operand, Intrinsic) f.write(f" Node_{ip} [label={repr(repr(INTRINSIC_NAMES[op.operand]))}];\n") @@ -1869,9 +2152,19 @@ def generate_control_flow_graph_as_dot_file(program: Program, dot_path: str): assert isinstance(op.operand, int) f.write(f" Node_{ip} [label={op.operand}]\n") f.write(f" Node_{ip} -> Node_{ip + 1};\n") - elif op.typ == OpType.IF: - f.write(f" Node_{ip} [shape=record label=if];\n") + elif op.typ == OpType.PUSH_MEM: + assert isinstance(op.operand, int) + f.write(f" Node_{ip} [label=\"mem({op.operand})\"]\n") + f.write(f" Node_{ip} -> Node_{ip + 1};\n") + elif op.typ == OpType.PUSH_LOCAL_MEM: + assert isinstance(op.operand, int) + f.write(f" Node_{ip} [label=\"local_mem({op.operand})\"]\n") f.write(f" Node_{ip} -> Node_{ip + 1};\n") + elif op.typ in [OpType.IF, OpType.IFSTAR]: + assert isinstance(op.operand, OpAddr), f"{op.operand}" + f.write(f" Node_{ip} [shape=record label=if];\n") + f.write(f" Node_{ip} -> Node_{ip + 1} [label=true];\n") + f.write(f" Node_{ip} -> Node_{op.operand} [label=false style=dashed];\n") elif op.typ == OpType.WHILE: f.write(f" Node_{ip} [shape=record label=while];\n") f.write(f" Node_{ip} -> Node_{ip + 1};\n") @@ -1884,17 +2177,27 @@ def generate_control_flow_graph_as_dot_file(program: Program, dot_path: str): assert isinstance(op.operand, OpAddr) f.write(f" Node_{ip} [shape=record label=else];\n") f.write(f" Node_{ip} -> Node_{op.operand};\n") - elif op.typ == OpType.ELIF: - assert isinstance(op.operand, OpAddr) - f.write(f" Node_{ip} [shape=record label=elif];\n") - f.write(f" Node_{ip} -> Node_{op.operand};\n") elif op.typ == OpType.END: assert isinstance(op.operand, OpAddr) f.write(f" Node_{ip} [shape=record label=end];\n") f.write(f" Node_{ip} -> Node_{op.operand};\n") + elif op.typ == OpType.SKIP_PROC: + assert isinstance(op.operand, OpAddr) + f.write(f" Node_{ip} [shape=record label=skip_proc];\n") + f.write(f" Node_{ip} -> Node_{op.operand};\n") + elif op.typ == OpType.PREP_PROC: + f.write(f" Node_{ip} [shape=record label=prep_proc];\n") + f.write(f" Node_{ip} -> Node_{ip + 1};\n") + elif op.typ == OpType.RET: + f.write(f" Node_{ip} [shape=record label=ret];\n") + elif op.typ == OpType.CALL: + assert isinstance(op.operand, OpAddr) + f.write(f" Node_{ip} [shape=record label=call];\n") + f.write(f" Node_{ip} -> Node_{op.operand};\n") + f.write(f" Node_{ip} -> Node_{ip + 1};\n") else: assert False, f"unimplemented operation {op.typ}" - f.write(f" Node_{len(program)} [label=halt];\n") + f.write(f" Node_{len(program.ops)} [label=halt];\n") f.write("}\n") def usage(compiler_name: str): @@ -1949,9 +2252,6 @@ def usage(compiler_name: str): else: break - if debug: - print("[INFO] Debug mode is enabled") - if len(argv) < 1: usage(compiler_name) print("[ERROR] no subcommand is provided", file=sys.stderr) diff --git a/std/std.porth b/std/std.porth index 5d7a195d..c3ab2dd3 100644 --- a/std/std.porth +++ b/std/std.porth @@ -1,402 +1,600 @@ -macro NULL 0 end +// TODO: NULL should have type ptr +const NULL 0 end -macro nop end +// TODO: using procs for true/false is kinda wasteful +proc true 1 cast(bool) end +proc false 0 cast(bool) end -macro true 0 0 = end -macro false 0 0 != end +const sizeof(u64) 8 end +const sizeof(u32) 4 end +const sizeof(ptr) sizeof(u64) end + +proc @ptr @64 cast(ptr) end /// Standard streams -macro stdin 0 end -macro stdout 1 end -macro stderr 2 end +const stdin 0 end +const stdout 1 end +const stderr 2 end /// Syscalls // Stolen from https://filippo.io/linux-syscall-table/ // Not all of the syscalls here are useful/implemented. I literally just copy-pasted them. // We can clean this up later. -macro SYS_read 0 end -macro SYS_write 1 end -macro SYS_open 2 end -macro SYS_close 3 end -macro SYS_stat 4 end -macro SYS_fstat 5 end -macro SYS_lstat 6 end -macro SYS_poll 7 end -macro SYS_lseek 8 end -macro SYS_mmap 9 end -macro SYS_mprotect 10 end -macro SYS_munmap 11 end -macro SYS_brk 12 end -macro SYS_rt_sigaction 13 end -macro SYS_rt_sigprocmask 14 end -macro SYS_rt_sigreturn 15 end -macro SYS_ioctl 16 end -macro SYS_pread64 17 end -macro SYS_pwrite64 18 end -macro SYS_readv 19 end -macro SYS_writev 20 end -macro SYS_access 21 end -macro SYS_pipe 22 end -macro SYS_select 23 end -macro SYS_sched_yield 24 end -macro SYS_mremap 25 end -macro SYS_msync 26 end -macro SYS_mincore 27 end -macro SYS_madvise 28 end -macro SYS_shmget 29 end -macro SYS_shmat 30 end -macro SYS_shmctl 31 end -macro SYS_dup 32 end -macro SYS_dup2 33 end -macro SYS_pause 34 end -macro SYS_nanosleep 35 end -macro SYS_getitimer 36 end -macro SYS_alarm 37 end -macro SYS_setitimer 38 end -macro SYS_getpid 39 end -macro SYS_sendfile 40 end -macro SYS_socket 41 end -macro SYS_connect 42 end -macro SYS_accept 43 end -macro SYS_sendto 44 end -macro SYS_recvfrom 45 end -macro SYS_sendmsg 46 end -macro SYS_recvmsg 47 end -macro SYS_shutdown 48 end -macro SYS_bind 49 end -macro SYS_listen 50 end -macro SYS_getsockname 51 end -macro SYS_getpeername 52 end -macro SYS_socketpair 53 end -macro SYS_setsockopt 54 end -macro SYS_getsockopt 55 end -macro SYS_clone 56 end -macro SYS_fork 57 end -macro SYS_vfork 58 end -macro SYS_execve 59 end -macro SYS_exit 60 end -macro SYS_wait4 61 end -macro SYS_kill 62 end -macro SYS_uname 63 end -macro SYS_semget 64 end -macro SYS_semop 65 end -macro SYS_semctl 66 end -macro SYS_shmdt 67 end -macro SYS_msgget 68 end -macro SYS_msgsnd 69 end -macro SYS_msgrcv 70 end -macro SYS_msgctl 71 end -macro SYS_fcntl 72 end -macro SYS_flock 73 end -macro SYS_fsync 74 end -macro SYS_fdatasync 75 end -macro SYS_truncate 76 end -macro SYS_ftruncate 77 end -macro SYS_getdents 78 end -macro SYS_getcwd 79 end -macro SYS_chdir 80 end -macro SYS_fchdir 81 end -macro SYS_rename 82 end -macro SYS_mkdir 83 end -macro SYS_rmdir 84 end -macro SYS_creat 85 end -macro SYS_link 86 end -macro SYS_unlink 87 end -macro SYS_symlink 88 end -macro SYS_readlink 89 end -macro SYS_chmod 90 end -macro SYS_fchmod 91 end -macro SYS_chown 92 end -macro SYS_fchown 93 end -macro SYS_lchown 94 end -macro SYS_umask 95 end -macro SYS_gettimeofday 96 end -macro SYS_getrlimit 97 end -macro SYS_getrusage 98 end -macro SYS_sysinfo 99 end -macro SYS_times 100 end -macro SYS_ptrace 101 end -macro SYS_getuid 102 end -macro SYS_syslog 103 end -macro SYS_getgid 104 end -macro SYS_setuid 105 end -macro SYS_setgid 106 end -macro SYS_geteuid 107 end -macro SYS_getegid 108 end -macro SYS_setpgid 109 end -macro SYS_getppid 110 end -macro SYS_getpgrp 111 end -macro SYS_setsid 112 end -macro SYS_setreuid 113 end -macro SYS_setregid 114 end -macro SYS_getgroups 115 end -macro SYS_setgroups 116 end -macro SYS_setresuid 117 end -macro SYS_getresuid 118 end -macro SYS_setresgid 119 end -macro SYS_getresgid 120 end -macro SYS_getpgid 121 end -macro SYS_setfsuid 122 end -macro SYS_setfsgid 123 end -macro SYS_getsid 124 end -macro SYS_capget 125 end -macro SYS_capset 126 end -macro SYS_rt_sigpending 127 end -macro SYS_rt_sigtimedwait 128 end -macro SYS_rt_sigqueueinfo 129 end -macro SYS_rt_sigsuspend 130 end -macro SYS_sigaltstack 131 end -macro SYS_utime 132 end -macro SYS_mknod 133 end -macro SYS_uselib 134 end -macro SYS_personality 135 end -macro SYS_ustat 136 end -macro SYS_statfs 137 end -macro SYS_fstatfs 138 end -macro SYS_sysfs 139 end -macro SYS_getpriority 140 end -macro SYS_setpriority 141 end -macro SYS_sched_setparam 142 end -macro SYS_sched_getparam 143 end -macro SYS_sched_setscheduler 144 end -macro SYS_sched_getscheduler 145 end -macro SYS_sched_get_priority_max 146 end -macro SYS_sched_get_priority_min 147 end -macro SYS_sched_rr_get_interval 148 end -macro SYS_mlock 149 end -macro SYS_munlock 150 end -macro SYS_mlockall 151 end -macro SYS_munlockall 152 end -macro SYS_vhangup 153 end -macro SYS_modify_ldt 154 end -macro SYS_pivot_root 155 end -macro SYS__sysctl 156 end -macro SYS_prctl 157 end -macro SYS_arch_prctl 158 end -macro SYS_adjtimex 159 end -macro SYS_setrlimit 160 end -macro SYS_chroot 161 end -macro SYS_sync 162 end -macro SYS_acct 163 end -macro SYS_settimeofday 164 end -macro SYS_mount 165 end -macro SYS_umount2 166 end -macro SYS_swapon 167 end -macro SYS_swapoff 168 end -macro SYS_reboot 169 end -macro SYS_sethostname 170 end -macro SYS_setdomainname 171 end -macro SYS_iopl 172 end -macro SYS_ioperm 173 end -macro SYS_create_module 174 end -macro SYS_init_module 175 end -macro SYS_delete_module 176 end -macro SYS_get_kernel_syms 177 end -macro SYS_query_module 178 end -macro SYS_quotactl 179 end -macro SYS_nfsservctl 180 end -macro SYS_getpmsg 181 end -macro SYS_putpmsg 182 end -macro SYS_afs_syscall 183 end -macro SYS_tuxcall 184 end -macro SYS_security 185 end -macro SYS_gettid 186 end -macro SYS_readahead 187 end -macro SYS_setxattr 188 end -macro SYS_lsetxattr 189 end -macro SYS_fsetxattr 190 end -macro SYS_getxattr 191 end -macro SYS_lgetxattr 192 end -macro SYS_fgetxattr 193 end -macro SYS_listxattr 194 end -macro SYS_llistxattr 195 end -macro SYS_flistxattr 196 end -macro SYS_removexattr 197 end -macro SYS_lremovexattr 198 end -macro SYS_fremovexattr 199 end -macro SYS_tkill 200 end -macro SYS_time 201 end -macro SYS_futex 202 end -macro SYS_sched_setaffinity 203 end -macro SYS_sched_getaffinity 204 end -macro SYS_set_thread_area 205 end -macro SYS_io_setup 206 end -macro SYS_io_destroy 207 end -macro SYS_io_getevents 208 end -macro SYS_io_submit 209 end -macro SYS_io_cancel 210 end -macro SYS_get_thread_area 211 end -macro SYS_lookup_dcookie 212 end -macro SYS_epoll_create 213 end -macro SYS_epoll_ctl_old 214 end -macro SYS_epoll_wait_old 215 end -macro SYS_remap_file_pages 216 end -macro SYS_getdents64 217 end -macro SYS_set_tid_address 218 end -macro SYS_restart_syscall 219 end -macro SYS_semtimedop 220 end -macro SYS_fadvise64 221 end -macro SYS_timer_create 222 end -macro SYS_timer_settime 223 end -macro SYS_timer_gettime 224 end -macro SYS_timer_getoverrun 225 end -macro SYS_timer_delete 226 end -macro SYS_clock_settime 227 end -macro SYS_clock_gettime 228 end -macro SYS_clock_getres 229 end -macro SYS_clock_nanosleep 230 end -macro SYS_exit_group 231 end -macro SYS_epoll_wait 232 end -macro SYS_epoll_ctl 233 end -macro SYS_tgkill 234 end -macro SYS_utimes 235 end -macro SYS_vserver 236 end -macro SYS_mbind 237 end -macro SYS_set_mempolicy 238 end -macro SYS_get_mempolicy 239 end -macro SYS_mq_open 240 end -macro SYS_mq_unlink 241 end -macro SYS_mq_timedsend 242 end -macro SYS_mq_timedreceive 243 end -macro SYS_mq_notify 244 end -macro SYS_mq_getsetattr 245 end -macro SYS_kexec_load 246 end -macro SYS_waitid 247 end -macro SYS_add_key 248 end -macro SYS_request_key 249 end -macro SYS_keyctl 250 end -macro SYS_ioprio_set 251 end -macro SYS_ioprio_get 252 end -macro SYS_inotify_init 253 end -macro SYS_inotify_add_watch 254 end -macro SYS_inotify_rm_watch 255 end -macro SYS_migrate_pages 256 end -macro SYS_openat 257 end -macro SYS_mkdirat 258 end -macro SYS_mknodat 259 end -macro SYS_fchownat 260 end -macro SYS_futimesat 261 end -macro SYS_newfstatat 262 end -macro SYS_unlinkat 263 end -macro SYS_renameat 264 end -macro SYS_linkat 265 end -macro SYS_symlinkat 266 end -macro SYS_readlinkat 267 end -macro SYS_fchmodat 268 end -macro SYS_faccessat 269 end -macro SYS_pselect6 270 end -macro SYS_ppoll 271 end -macro SYS_unshare 272 end -macro SYS_set_robust_list 273 end -macro SYS_get_robust_list 274 end -macro SYS_splice 275 end -macro SYS_tee 276 end -macro SYS_sync_file_range 277 end -macro SYS_vmsplice 278 end -macro SYS_move_pages 279 end -macro SYS_utimensat 280 end -macro SYS_epoll_pwait 281 end -macro SYS_signalfd 282 end -macro SYS_timerfd_create 283 end -macro SYS_eventfd 284 end -macro SYS_fallocate 285 end -macro SYS_timerfd_settime 286 end -macro SYS_timerfd_gettime 287 end -macro SYS_accept4 288 end -macro SYS_signalfd4 289 end -macro SYS_eventfd2 290 end -macro SYS_epoll_create1 291 end -macro SYS_dup3 292 end -macro SYS_pipe2 293 end -macro SYS_inotify_init1 294 end -macro SYS_preadv 295 end -macro SYS_pwritev 296 end -macro SYS_rt_tgsigqueueinfo 297 end -macro SYS_perf_event_open 298 end -macro SYS_recvmmsg 299 end -macro SYS_fanotify_init 300 end -macro SYS_fanotify_mark 301 end -macro SYS_prlimit64 302 end -macro SYS_name_to_handle_at 303 end -macro SYS_open_by_handle_at 304 end -macro SYS_clock_adjtime 305 end -macro SYS_syncfs 306 end -macro SYS_sendmmsg 307 end -macro SYS_setns 308 end -macro SYS_getcpu 309 end -macro SYS_process_vm_readv 310 end -macro SYS_process_vm_writev 311 end -macro SYS_kcmp 312 end -macro SYS_finit_module 313 end - -macro AT_FDCWD -100 end - -macro O_RDONLY 0 end - -macro CLOCK_MONOTONIC 1 end -macro TIMER_ABSTIME 1 end +const SYS_read 0 end +const SYS_write 1 end +const SYS_open 2 end +const SYS_close 3 end +const SYS_stat 4 end +const SYS_fstat 5 end +const SYS_lstat 6 end +const SYS_poll 7 end +const SYS_lseek 8 end +const SYS_mmap 9 end +const SYS_mprotect 10 end +const SYS_munmap 11 end +const SYS_brk 12 end +const SYS_rt_sigaction 13 end +const SYS_rt_sigprocmask 14 end +const SYS_rt_sigreturn 15 end +const SYS_ioctl 16 end +const SYS_pread64 17 end +const SYS_pwrite64 18 end +const SYS_readv 19 end +const SYS_writev 20 end +const SYS_access 21 end +const SYS_pipe 22 end +const SYS_select 23 end +const SYS_sched_yield 24 end +const SYS_mremap 25 end +const SYS_msync 26 end +const SYS_mincore 27 end +const SYS_madvise 28 end +const SYS_shmget 29 end +const SYS_shmat 30 end +const SYS_shmctl 31 end +const SYS_dup 32 end +const SYS_dup2 33 end +const SYS_pause 34 end +const SYS_nanosleep 35 end +const SYS_getitimer 36 end +const SYS_alarm 37 end +const SYS_setitimer 38 end +const SYS_getpid 39 end +const SYS_sendfile 40 end +const SYS_socket 41 end +const SYS_connect 42 end +const SYS_accept 43 end +const SYS_sendto 44 end +const SYS_recvfrom 45 end +const SYS_sendmsg 46 end +const SYS_recvmsg 47 end +const SYS_shutdown 48 end +const SYS_bind 49 end +const SYS_listen 50 end +const SYS_getsockname 51 end +const SYS_getpeername 52 end +const SYS_socketpair 53 end +const SYS_setsockopt 54 end +const SYS_getsockopt 55 end +const SYS_clone 56 end +const SYS_fork 57 end +const SYS_vfork 58 end +const SYS_execve 59 end +const SYS_exit 60 end +const SYS_wait4 61 end +const SYS_kill 62 end +const SYS_uname 63 end +const SYS_semget 64 end +const SYS_semop 65 end +const SYS_semctl 66 end +const SYS_shmdt 67 end +const SYS_msgget 68 end +const SYS_msgsnd 69 end +const SYS_msgrcv 70 end +const SYS_msgctl 71 end +const SYS_fcntl 72 end +const SYS_flock 73 end +const SYS_fsync 74 end +const SYS_fdatasync 75 end +const SYS_truncate 76 end +const SYS_ftruncate 77 end +const SYS_getdents 78 end +const SYS_getcwd 79 end +const SYS_chdir 80 end +const SYS_fchdir 81 end +const SYS_rename 82 end +const SYS_mkdir 83 end +const SYS_rmdir 84 end +const SYS_creat 85 end +const SYS_link 86 end +const SYS_unlink 87 end +const SYS_symlink 88 end +const SYS_readlink 89 end +const SYS_chmod 90 end +const SYS_fchmod 91 end +const SYS_chown 92 end +const SYS_fchown 93 end +const SYS_lchown 94 end +const SYS_umask 95 end +const SYS_gettimeofday 96 end +const SYS_getrlimit 97 end +const SYS_getrusage 98 end +const SYS_sysinfo 99 end +const SYS_times 100 end +const SYS_ptrace 101 end +const SYS_getuid 102 end +const SYS_syslog 103 end +const SYS_getgid 104 end +const SYS_setuid 105 end +const SYS_setgid 106 end +const SYS_geteuid 107 end +const SYS_getegid 108 end +const SYS_setpgid 109 end +const SYS_getppid 110 end +const SYS_getpgrp 111 end +const SYS_setsid 112 end +const SYS_setreuid 113 end +const SYS_setregid 114 end +const SYS_getgroups 115 end +const SYS_setgroups 116 end +const SYS_setresuid 117 end +const SYS_getresuid 118 end +const SYS_setresgid 119 end +const SYS_getresgid 120 end +const SYS_getpgid 121 end +const SYS_setfsuid 122 end +const SYS_setfsgid 123 end +const SYS_getsid 124 end +const SYS_capget 125 end +const SYS_capset 126 end +const SYS_rt_sigpending 127 end +const SYS_rt_sigtimedwait 128 end +const SYS_rt_sigqueueinfo 129 end +const SYS_rt_sigsuspend 130 end +const SYS_sigaltstack 131 end +const SYS_utime 132 end +const SYS_mknod 133 end +const SYS_uselib 134 end +const SYS_personality 135 end +const SYS_ustat 136 end +const SYS_statfs 137 end +const SYS_fstatfs 138 end +const SYS_sysfs 139 end +const SYS_getpriority 140 end +const SYS_setpriority 141 end +const SYS_sched_setparam 142 end +const SYS_sched_getparam 143 end +const SYS_sched_setscheduler 144 end +const SYS_sched_getscheduler 145 end +const SYS_sched_get_priority_max 146 end +const SYS_sched_get_priority_min 147 end +const SYS_sched_rr_get_interval 148 end +const SYS_mlock 149 end +const SYS_munlock 150 end +const SYS_mlockall 151 end +const SYS_munlockall 152 end +const SYS_vhangup 153 end +const SYS_modify_ldt 154 end +const SYS_pivot_root 155 end +const SYS__sysctl 156 end +const SYS_prctl 157 end +const SYS_arch_prctl 158 end +const SYS_adjtimex 159 end +const SYS_setrlimit 160 end +const SYS_chroot 161 end +const SYS_sync 162 end +const SYS_acct 163 end +const SYS_settimeofday 164 end +const SYS_mount 165 end +const SYS_umount2 166 end +const SYS_swapon 167 end +const SYS_swapoff 168 end +const SYS_reboot 169 end +const SYS_sethostname 170 end +const SYS_setdomainname 171 end +const SYS_iopl 172 end +const SYS_ioperm 173 end +const SYS_create_module 174 end +const SYS_init_module 175 end +const SYS_delete_module 176 end +const SYS_get_kernel_syms 177 end +const SYS_query_module 178 end +const SYS_quotactl 179 end +const SYS_nfsservctl 180 end +const SYS_getpmsg 181 end +const SYS_putpmsg 182 end +const SYS_afs_syscall 183 end +const SYS_tuxcall 184 end +const SYS_security 185 end +const SYS_gettid 186 end +const SYS_readahead 187 end +const SYS_setxattr 188 end +const SYS_lsetxattr 189 end +const SYS_fsetxattr 190 end +const SYS_getxattr 191 end +const SYS_lgetxattr 192 end +const SYS_fgetxattr 193 end +const SYS_listxattr 194 end +const SYS_llistxattr 195 end +const SYS_flistxattr 196 end +const SYS_removexattr 197 end +const SYS_lremovexattr 198 end +const SYS_fremovexattr 199 end +const SYS_tkill 200 end +const SYS_time 201 end +const SYS_futex 202 end +const SYS_sched_setaffinity 203 end +const SYS_sched_getaffinity 204 end +const SYS_set_thread_area 205 end +const SYS_io_setup 206 end +const SYS_io_destroy 207 end +const SYS_io_getevents 208 end +const SYS_io_submit 209 end +const SYS_io_cancel 210 end +const SYS_get_thread_area 211 end +const SYS_lookup_dcookie 212 end +const SYS_epoll_create 213 end +const SYS_epoll_ctl_old 214 end +const SYS_epoll_wait_old 215 end +const SYS_remap_file_pages 216 end +const SYS_getdents64 217 end +const SYS_set_tid_address 218 end +const SYS_restart_syscall 219 end +const SYS_semtimedop 220 end +const SYS_fadvise64 221 end +const SYS_timer_create 222 end +const SYS_timer_settime 223 end +const SYS_timer_gettime 224 end +const SYS_timer_getoverrun 225 end +const SYS_timer_delete 226 end +const SYS_clock_settime 227 end +const SYS_clock_gettime 228 end +const SYS_clock_getres 229 end +const SYS_clock_nanosleep 230 end +const SYS_exit_group 231 end +const SYS_epoll_wait 232 end +const SYS_epoll_ctl 233 end +const SYS_tgkill 234 end +const SYS_utimes 235 end +const SYS_vserver 236 end +const SYS_mbind 237 end +const SYS_set_mempolicy 238 end +const SYS_get_mempolicy 239 end +const SYS_mq_open 240 end +const SYS_mq_unlink 241 end +const SYS_mq_timedsend 242 end +const SYS_mq_timedreceive 243 end +const SYS_mq_notify 244 end +const SYS_mq_getsetattr 245 end +const SYS_kexec_load 246 end +const SYS_waitid 247 end +const SYS_add_key 248 end +const SYS_request_key 249 end +const SYS_keyctl 250 end +const SYS_ioprio_set 251 end +const SYS_ioprio_get 252 end +const SYS_inotify_init 253 end +const SYS_inotify_add_watch 254 end +const SYS_inotify_rm_watch 255 end +const SYS_migrate_pages 256 end +const SYS_openat 257 end +const SYS_mkdirat 258 end +const SYS_mknodat 259 end +const SYS_fchownat 260 end +const SYS_futimesat 261 end +const SYS_newfstatat 262 end +const SYS_unlinkat 263 end +const SYS_renameat 264 end +const SYS_linkat 265 end +const SYS_symlinkat 266 end +const SYS_readlinkat 267 end +const SYS_fchmodat 268 end +const SYS_faccessat 269 end +const SYS_pselect6 270 end +const SYS_ppoll 271 end +const SYS_unshare 272 end +const SYS_set_robust_list 273 end +const SYS_get_robust_list 274 end +const SYS_splice 275 end +const SYS_tee 276 end +const SYS_sync_file_range 277 end +const SYS_vmsplice 278 end +const SYS_move_pages 279 end +const SYS_utimensat 280 end +const SYS_epoll_pwait 281 end +const SYS_signalfd 282 end +const SYS_timerfd_create 283 end +const SYS_eventfd 284 end +const SYS_fallocate 285 end +const SYS_timerfd_settime 286 end +const SYS_timerfd_gettime 287 end +const SYS_accept4 288 end +const SYS_signalfd4 289 end +const SYS_eventfd2 290 end +const SYS_epoll_create1 291 end +const SYS_dup3 292 end +const SYS_pipe2 293 end +const SYS_inotify_init1 294 end +const SYS_preadv 295 end +const SYS_pwritev 296 end +const SYS_rt_tgsigqueueinfo 297 end +const SYS_perf_event_open 298 end +const SYS_recvmmsg 299 end +const SYS_fanotify_init 300 end +const SYS_fanotify_mark 301 end +const SYS_prlimit64 302 end +const SYS_name_to_handle_at 303 end +const SYS_open_by_handle_at 304 end +const SYS_clock_adjtime 305 end +const SYS_syncfs 306 end +const SYS_sendmmsg 307 end +const SYS_setns 308 end +const SYS_getcpu 309 end +const SYS_process_vm_readv 310 end +const SYS_process_vm_writev 311 end +const SYS_kcmp 312 end +const SYS_finit_module 313 end + +const AT_FDCWD -100 end + +const O_RDONLY 0 end +const O_WRONLY 1 end +const O_RDWR 2 end +const O_CREAT 64 end + +const CLOCK_MONOTONIC 1 end +const TIMER_ABSTIME 1 end + +const MAP_PRIVATE 2 end +const PROT_READ 1 end + +const sizeof(timespec) 16 end + +const sizeof(stat) 144 end +proc stat.st_dev 0 + end +proc stat.st_ino 8 + end +proc stat.st_mode 24 + end +proc stat.st_nlink 16 + end +proc stat.st_uid 28 + end +proc stat.st_gid 32 + end +proc stat.st_rdev 40 + end +proc stat.st_size 48 + end +proc @stat.st_size stat.st_size @64 end +proc stat.st_blksize 56 + end +proc stat.st_blocks 64 + end +proc stat.st_atim 72 + end +proc stat.st_mtim 88 + end +proc stat.st_ctim 104 + end +const sizeof(stat.st_dev) sizeof(u64) end +const sizeof(stat.st_ino) sizeof(u64) end +const sizeof(stat.st_mode) sizeof(u32) end +const sizeof(stat.st_nlink) sizeof(u64) end +const sizeof(stat.st_uid) sizeof(u32) end +const sizeof(stat.st_gid) sizeof(u32) end +const sizeof(stat.st_rdev) sizeof(u64) end +const sizeof(stat.st_size) sizeof(u64) end +const sizeof(stat.st_blksize) sizeof(u64) end +const sizeof(stat.st_blocks) sizeof(u64) end +const sizeof(stat.st_atim) sizeof(timespec) end +const sizeof(stat.st_mtim) sizeof(timespec) end +const sizeof(stat.st_ctim) sizeof(timespec) end // Wrappers for common syscalls -macro write SYS_write syscall3 end -macro read SYS_read syscall3 end -macro openat SYS_openat syscall3 end -macro close SYS_close syscall1 end -macro exit SYS_exit syscall1 drop end -macro clock_nanosleep SYS_clock_nanosleep syscall4 end - -macro 2dup over over end -macro 2drop drop drop end - -macro / divmod drop end -macro % divmod swap drop end -macro mod % end -macro div / end - -macro nth_argv - 8 * argv + ,64 cast(ptr) -end +proc write SYS_write syscall3 end +proc read SYS_read syscall3 end +proc openat SYS_openat syscall4 end +proc fstat SYS_fstat syscall2 end +proc close SYS_close syscall1 end +proc exit SYS_exit syscall1 drop end +proc mmap SYS_mmap syscall6 end +proc clock_nanosleep SYS_clock_nanosleep syscall4 end +proc fork SYS_fork syscall0 end +proc getpid SYS_getpid syscall0 end +proc execve SYS_execve syscall3 end +proc wait4 SYS_wait4 syscall4 end -macro inc64 - dup ,64 1 + .64 -end +proc 2dup over over end +proc 2drop drop drop end -macro dec64 - dup ,64 1 - .64 +proc / divmod drop end +proc % divmod swap drop end +proc mod % end +proc div / end + +proc nth_argv + 8 * argv + @64 cast(ptr) end -macro cstrlen +proc inc64 dup @64 1 + swap !64 end +proc dec64 dup @64 1 - swap !64 end +proc inc32 dup @32 1 + swap !32 end +proc dec32 dup @32 1 - swap !32 end + +proc cstrlen dup - while dup , 0 != do 1 + end + while dup @8 0 != do 1 + end swap - end -macro cstreq +proc cstreq while - if over , 0 != over , 0 != and do - over , over , = + over @8 0 != over @8 0 != and if + over @8 over @8 = else false end do 1 + swap 1 + end - , 0 = - swap , 0 = + @8 0 = + swap @8 0 = + and +end + +proc cstr-to-str dup cstrlen swap end + +// TODO: fputs should crash the app if write fails +proc fputs write drop end +proc puts stdout fputs end +proc eputs stderr fputs end + +const offsetof(Str.count) sizeof(u64) offset end +const offsetof(Str.data) sizeof(ptr) offset end +const sizeof(Str) reset end + +proc Str.count offsetof(Str.count) + end +proc Str.data offsetof(Str.data) + end +proc @Str.count Str.count @64 end +proc @Str.data Str.data @64 cast(ptr) end +proc !Str.count Str.count !64 end +proc !Str.data Str.data !64 end + +proc @Str + dup @Str.count + swap @Str.data +end + +proc !Str // count data dst - + dup rot swap + !Str.data + !Str.count +end + +proc str-chop-one-left + dup Str.count dec64 + Str.data inc64 +end + +proc str-trim-left // input -- + while + dup @Str.count 0 > if + dup @Str.data @8 ' ' = + else false end + do + dup str-chop-one-left + end + drop +end + +proc str-chop-by-delim // delim line input + memory delim sizeof(u64) end + rot delim !64 + 2dup @Str.data swap !Str.data + over 0 swap !Str.count + while + dup @Str.count 0 > if + dup @Str.data @8 delim @64 != + else false end + do + dup str-chop-one-left + swap dup Str.count inc64 swap + end + dup @Str.count 0 > if + dup str-chop-one-left + end + 2drop +end + +proc streq // n1 s1 n2 s2 + memory a sizeof(Str) end + a !Str + memory b sizeof(Str) end + b !Str + + a @Str.count b @Str.count = if + 0 while + dup a @Str.count < if + dup a @Str.data + @8 + over b @Str.data + @8 + = + else false end + do 1 + end + a @Str.count >= + else false end +end + +proc isdigit + dup '0' >= + swap '9' <= and end -macro cstr-to-pstr - dup cstrlen swap +proc try-parse-int // int ptr -- int err + memory input sizeof(Str) end + input !Str + + 0 while + input @Str.count 0 > if + input @Str.data @8 isdigit + else false end + do + 10 * input @Str.data @8 '0' - + + input str-chop-one-left + end + + input @Str.count 0 <= end -macro fputs - write drop +// Custom logical not, since the intrinsic `not` is the bitwise one and does not allow +// to properly invert a boolean. +proc lnot + cast(int) 1 - cast(bool) end -macro puts - stdout fputs +const PUTU_BUFFER_CAP 32 end +// TODO: fputu should fail if write call fails +// TODO: fputu does not print negative numbers +proc fputu // value fd -- + memory buffer PUTU_BUFFER_CAP end + memory fd sizeof(u64) end + fd !64 + + dup 0 = if + "0" fd @64 fputs + else + buffer PUTU_BUFFER_CAP + + while over 0 > do + 1 - dup rot + 10 divmod + rot swap '0' + swap !8 swap + end + + dup + buffer PUTU_BUFFER_CAP + swap - swap fd @64 fputs + end + drop end -macro eputs - stderr fputs +proc putu stdout fputu end +proc eputu stderr fputu end + +// TODO: there is no fputi function that would print signed integers + +proc memcpy // size src dst -- + memory src sizeof(ptr) end + memory dst sizeof(ptr) end + dst !64 + src !64 + while dup 0 > do + src @64 cast(ptr) @8 + dst @64 cast(ptr) !8 + src inc64 + dst inc64 + 1 - + end drop end + +// Deprecated Words ////////// +proc .64 swap !64 end +proc ,64 @64 end +proc ! !8 end +proc @ @8 end +proc . swap ! end +proc , @ end +proc cstr-to-pstr cstr-to-str end +memory mem 640000 end +proc nop end +proc fputd fputu end +proc eputd eputu end +proc putd putu end +////////////////////////////// diff --git a/test.py b/test.py index f8154690..d0608902 100755 --- a/test.py +++ b/test.py @@ -186,6 +186,9 @@ def usage(exe_name: str): print(" file or folder with *.porth files. The default [TARGET] is") print(" './tests/'") print() + print(" full") + print(" Test and type check everything. (Should be run on CI)") + print() print(" help") print(" Print this message to stdout and exit with 0 code.") @@ -236,7 +239,15 @@ def usage(exe_name: str): elif path.isfile(target): run_test_for_file(target) else: + # TODO: `./test.py run non-existing-file` fails with 'unreachable' assert False, 'unreachable' + elif subcommand == 'full': + cmd_run_echoed(['mypy', './porth.py', './test.py']) + run_test_for_folder('./tests/') + run_test_for_folder('./examples/') + run_test_for_folder('./euler/') + # TODO: do run_test_for_file on porth.porth with ./tests/intrinsics.porth + cmd_run_echoed([sys.executable, './porth.py', 'com', './porth.porth']) elif subcommand == 'help': usage(exe_name) else: diff --git a/tests/.gitignore b/tests/.gitignore index 6b321c22..ff38ff72 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -8,8 +8,20 @@ macros dead-recursive-macro argv if-else -else-less-if +if-else-less while here memory-forth-style -cstr \ No newline at end of file +cstr +empty +while-procs +intrinsics +if-orelse +consts +2swap +try-parse-int +streq +memcpy +fputd +offset-reset +str-chop-by-delim diff --git a/tests/2swap.porth b/tests/2swap.porth new file mode 100644 index 00000000..4c4d259f --- /dev/null +++ b/tests/2swap.porth @@ -0,0 +1,16 @@ +include "std.porth" + +proc 2swap + memory a sizeof(u64) end + memory b sizeof(u64) end + memory c sizeof(u64) end + memory d sizeof(u64) end + d !64 c !64 b !64 a !64 + c @64 d @64 a @64 b @64 +end + +1 2 3 4 +print print print print +"------------------------------\n" puts +1 2 3 4 2swap +print print print print diff --git a/tests/2swap.txt b/tests/2swap.txt new file mode 100644 index 00000000..7c64e144 --- /dev/null +++ b/tests/2swap.txt @@ -0,0 +1,17 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 47 +4 +3 +2 +1 +------------------------------ +2 +1 +4 +3 + +:b stderr 0 + diff --git a/tests/consts.porth b/tests/consts.porth new file mode 100644 index 00000000..be21d6b0 --- /dev/null +++ b/tests/consts.porth @@ -0,0 +1,7 @@ +const N 69 end +const M 420 end +const K N M + end + +N print +M print +K print diff --git a/tests/memory-forth-style.txt b/tests/consts.txt similarity index 54% rename from tests/memory-forth-style.txt rename to tests/consts.txt index a4a9a422..5828c0f1 100644 --- a/tests/memory-forth-style.txt +++ b/tests/consts.txt @@ -2,12 +2,11 @@ :b stdin 0 :i returncode 0 -:b stdout 33 -abc +:b stdout 10 +0 +1 +2 +3 4 -bcd -4 -18446744073709551615 - :b stderr 0 diff --git a/tests/else-less-if-fail.porth b/tests/else-less-if-fail.porth deleted file mode 100644 index 807121d1..00000000 --- a/tests/else-less-if-fail.porth +++ /dev/null @@ -1 +0,0 @@ -if 34 35 + 69 = do 69 end print diff --git a/tests/else-less-if-fail.txt b/tests/else-less-if-fail.txt deleted file mode 100644 index 6f2fde4e..00000000 --- a/tests/else-less-if-fail.txt +++ /dev/null @@ -1,11 +0,0 @@ -:i argc 0 -:b stdin 0 - -:i returncode 1 -:b stdout 0 - -:b stderr 274 -./tests/else-less-if-fail.porth:1:23: ERROR: else-less if block is not allowed to alter the types of the arguments on the data stack -./tests/else-less-if-fail.porth:1:23: NOTE: Expected types: [] -./tests/else-less-if-fail.porth:1:23: NOTE: Actual types: [] - diff --git a/tests/else-less-if.porth b/tests/else-less-if.porth deleted file mode 100644 index b2747ede..00000000 --- a/tests/else-less-if.porth +++ /dev/null @@ -1,2 +0,0 @@ -if 34 35 + 70 = do 69 print end -if 34 35 + 69 = do 69 print end diff --git a/tests/else-not-inside-if-error.txt b/tests/else-not-inside-if-error.txt index 38043676..0dacc352 100644 --- a/tests/else-not-inside-if-error.txt +++ b/tests/else-not-inside-if-error.txt @@ -4,6 +4,6 @@ :i returncode 1 :b stdout 0 -:b stderr 90 -./tests/else-not-inside-if-error.porth:1:1: ERROR: `else` can only be used in `do` blocks +:b stderr 92 +./tests/else-not-inside-if-error.porth:2:1: ERROR: `else` can only come after `if` or `if*` diff --git a/tests/empty.porth b/tests/empty.porth new file mode 100644 index 00000000..e69de29b diff --git a/tests/empty.txt b/tests/empty.txt new file mode 100644 index 00000000..5cb432cf --- /dev/null +++ b/tests/empty.txt @@ -0,0 +1,8 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 0 + +:b stderr 0 + diff --git a/tests/end-cant-close-error.txt b/tests/end-cant-close-error.txt index f5255f43..8ac71550 100644 --- a/tests/end-cant-close-error.txt +++ b/tests/end-cant-close-error.txt @@ -4,6 +4,6 @@ :i returncode 1 :b stdout 0 -:b stderr 107 -./tests/end-cant-close-error.porth:1:1: ERROR: `end` can only close `else`, `do` or `macro` blocks for now +:b stderr 121 +./tests/end-cant-close-error.porth:1:1: ERROR: `end` can only close `if`, `else`, `do`, `macro` or `proc` blocks for now diff --git a/tests/fputd.porth b/tests/fputd.porth new file mode 100644 index 00000000..1dde3563 --- /dev/null +++ b/tests/fputd.porth @@ -0,0 +1,4 @@ +include "std.porth" + +"stderr: " stderr fputs 69 stderr fputu "\n" stderr fputs +"stdout: " stdout fputs 420 stdout fputu "\n" stdout fputs diff --git a/tests/fputd.txt b/tests/fputd.txt new file mode 100644 index 00000000..bcf2d763 --- /dev/null +++ b/tests/fputd.txt @@ -0,0 +1,10 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 12 +stdout: 420 + +:b stderr 11 +stderr: 69 + diff --git a/tests/if-else-fail.porth b/tests/if-else-fail.porth index 8f012ca8..c2dda95c 100644 --- a/tests/if-else-fail.porth +++ b/tests/if-else-fail.porth @@ -1 +1 @@ -if 34 35 + 69 = do 69 print else 420 end +34 35 + 69 = if 69 print else 420 end diff --git a/tests/if-else-fail.txt b/tests/if-else-fail.txt index 4647f4ee..515ec535 100644 --- a/tests/if-else-fail.txt +++ b/tests/if-else-fail.txt @@ -4,8 +4,6 @@ :i returncode 1 :b stdout 0 -:b stderr 263 -./tests/if-else-fail.porth:1:38: ERROR: all branches of the if-block must produce the same types of the arguments on the data stack -./tests/if-else-fail.porth:1:38: NOTE: Expected types: [] -./tests/if-else-fail.porth:1:38: NOTE: Actual types: [] +:b stderr 94 +./tests/if-else-fail.porth:1:31: ERROR: unhandled data on the data stack: [] diff --git a/tests/if-else-less-fail.porth b/tests/if-else-less-fail.porth new file mode 100644 index 00000000..68b5e8c9 --- /dev/null +++ b/tests/if-else-less-fail.porth @@ -0,0 +1 @@ +34 35 + 69 = if 69 end print diff --git a/tests/if-else-less-fail.txt b/tests/if-else-less-fail.txt new file mode 100644 index 00000000..c4266ee4 --- /dev/null +++ b/tests/if-else-less-fail.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 92 +./tests/if-else-less-fail.porth:1:24: ERROR: not enough arguments for the `print` intrinsic + diff --git a/tests/if-else-less.porth b/tests/if-else-less.porth new file mode 100644 index 00000000..12ecd4b7 --- /dev/null +++ b/tests/if-else-less.porth @@ -0,0 +1,2 @@ +34 35 + 70 = if 69 print end +34 35 + 69 = if 69 print end diff --git a/tests/else-less-if.txt b/tests/if-else-less.txt similarity index 100% rename from tests/else-less-if.txt rename to tests/if-else-less.txt diff --git a/tests/if-else.porth b/tests/if-else.porth index b0ca95b6..49fa639c 100644 --- a/tests/if-else.porth +++ b/tests/if-else.porth @@ -1,2 +1,2 @@ -if 34 35 + 70 = do 69 else 420 end print -if 34 35 + 69 = do 69 else 420 end print +34 35 + 70 = if 69 else 420 end print +34 35 + 69 = if 69 else 420 end print diff --git a/tests/if-orelse.porth b/tests/if-orelse.porth new file mode 100644 index 00000000..44ea2237 --- /dev/null +++ b/tests/if-orelse.porth @@ -0,0 +1,7 @@ +include "std.porth" + +true if + 69 print +else false if* + 420 print +end diff --git a/tests/if-orelse.txt b/tests/if-orelse.txt new file mode 100644 index 00000000..e0327acb --- /dev/null +++ b/tests/if-orelse.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 3 +69 + +:b stderr 0 + diff --git a/tests/intrinsic-redefinition-error.txt b/tests/intrinsic-redefinition-error.txt index a7392cca..88323c8d 100644 --- a/tests/intrinsic-redefinition-error.txt +++ b/tests/intrinsic-redefinition-error.txt @@ -4,6 +4,6 @@ :i returncode 1 :b stdout 0 -:b stderr 141 -./tests/intrinsic-redefinition-error.porth:1:7: ERROR: redefinition of an intrinsic word `+`. Please choose a different name for your macro. +:b stderr 93 +./tests/intrinsic-redefinition-error.porth:1:7: ERROR: redefinition of an intrinsic word `+` diff --git a/tests/intrinsics.porth b/tests/intrinsics.porth new file mode 100644 index 00000000..43ec5752 --- /dev/null +++ b/tests/intrinsics.porth @@ -0,0 +1,28 @@ +34 35 + print +35 34 - print +105 4 * print +269 100 divmod print print +1024 1 shr print +1 5 shl print +1 2 or 4 or print +5 2 and print +// TODO: `not` intrinsic is not properly tested +5 5 = print +5 6 = print +420 69 > print +69 420 > print +69 420 < print +420 69 < print +420 420 >= print +420 69 >= print +69 420 >= print +420 420 <= print +69 420 <= print +420 69 <= print +420 69 != print +69 69 != print +420 dup print print +69 420 swap print print +69 drop +10 20 over print print print +10 20 30 rot print print print diff --git a/tests/intrinsics.txt b/tests/intrinsics.txt new file mode 100644 index 00000000..c57fc33a --- /dev/null +++ b/tests/intrinsics.txt @@ -0,0 +1,41 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 86 +69 +1 +420 +69 +2 +512 +32 +7 +0 +1 +0 +1 +0 +1 +0 +1 +1 +0 +1 +1 +0 +1 +0 +420 +420 +69 +420 +10 +20 +10 +10 +30 +20 + +:b stderr 0 + diff --git a/tests/macro-redefinition-error.txt b/tests/macro-redefinition-error.txt index 21451dcd..6c6958f1 100644 --- a/tests/macro-redefinition-error.txt +++ b/tests/macro-redefinition-error.txt @@ -4,7 +4,7 @@ :i returncode 1 :b stdout 0 -:b stderr 184 -./tests/macro-redefinition-error.porth:2:7: ERROR: redefinition of already existing macro `test` -./tests/macro-redefinition-error.porth:1:7: NOTE: the first definition is located here +:b stderr 172 +./tests/macro-redefinition-error.porth:2:7: ERROR: redefinition of a macro `test` +./tests/macro-redefinition-error.porth:1:7: NOTE: the original definition is located here diff --git a/tests/macros.porth b/tests/macros.porth index 1c4016b2..9316240c 100644 --- a/tests/macros.porth +++ b/tests/macros.porth @@ -1,7 +1,7 @@ include "std.porth" macro check_less - if < do "YES\n" else "NO\n" end puts + < if "YES\n" else "NO\n" end puts end 1 2 check_less @@ -9,7 +9,7 @@ end macro even_fibs 0 1 while over 1000000 < do - if over 2 mod 0 = do + over 2 mod 0 = if over print end swap over + diff --git a/tests/memcpy.porth b/tests/memcpy.porth new file mode 100644 index 00000000..19f99e92 --- /dev/null +++ b/tests/memcpy.porth @@ -0,0 +1,26 @@ +include "std.porth" + +const N 32 end +const K 8 end +const M N K divmod drop end + +memory a N end +memory b M end + +0 while dup M < do + dup 'a' + + over b + + !8 + 1 + +end drop + +0 while dup K < do + dup M * a + + M b rot memcpy + 1 + +end drop + +0 while dup K < do + N a puts "\n" puts + 1 + +end drop diff --git a/tests/memcpy.txt b/tests/memcpy.txt new file mode 100644 index 00000000..54b3eee3 --- /dev/null +++ b/tests/memcpy.txt @@ -0,0 +1,16 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 264 +abcdabcdabcdabcdabcdabcdabcdabcd +abcdabcdabcdabcdabcdabcdabcdabcd +abcdabcdabcdabcdabcdabcdabcdabcd +abcdabcdabcdabcdabcdabcdabcdabcd +abcdabcdabcdabcdabcdabcdabcdabcd +abcdabcdabcdabcdabcdabcdabcdabcd +abcdabcdabcdabcdabcdabcdabcdabcd +abcdabcdabcdabcdabcdabcdabcdabcd + +:b stderr 0 + diff --git a/tests/memory-definition-stack-underflow.porth b/tests/memory-definition-stack-underflow.porth new file mode 100644 index 00000000..1669638d --- /dev/null +++ b/tests/memory-definition-stack-underflow.porth @@ -0,0 +1 @@ +memory xs + end diff --git a/tests/memory-definition-stack-underflow.txt b/tests/memory-definition-stack-underflow.txt new file mode 100644 index 00000000..e0a6615f --- /dev/null +++ b/tests/memory-definition-stack-underflow.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 100 +./tests/memory-definition-stack-underflow.porth:1:11: ERROR: not enough arguments for `+` intrinsic + diff --git a/tests/memory-definition-unsupported-keyword.porth b/tests/memory-definition-unsupported-keyword.porth new file mode 100644 index 00000000..25b20c54 --- /dev/null +++ b/tests/memory-definition-unsupported-keyword.porth @@ -0,0 +1,7 @@ +memory hello + if true do + 69 + else + 420 + end +end diff --git a/tests/memory-definition-unsupported-keyword.txt b/tests/memory-definition-unsupported-keyword.txt new file mode 100644 index 00000000..c1c5de1c --- /dev/null +++ b/tests/memory-definition-unsupported-keyword.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 116 +./tests/memory-definition-unsupported-keyword.porth:2:3: ERROR: unsupported keyword `if` in compile time evaluation + diff --git a/tests/memory-forth-style.porth b/tests/memory-forth-style.porth deleted file mode 100644 index d21bfd28..00000000 --- a/tests/memory-forth-style.porth +++ /dev/null @@ -1,23 +0,0 @@ -// Forth-style memory access -include "std.porth" - -// write "abc" into the memory -97 mem 0 + ! -98 mem 1 + ! -99 mem 2 + ! -10 mem 3 + ! - -// print "abc" to stdout -4 mem stdout write print - -// increament each character by 1 making it "bcd" -mem 0 + dup @ 1 + swap ! -mem 1 + dup @ 1 + swap ! -mem 2 + dup @ 1 + swap ! - -// print "bcd" to stdout -4 mem stdout write print - -// print UINT64_MAX (Largest 64 bit word) -18446744073709551615 mem !64 -mem @64 print diff --git a/tests/memory-redefinition-of-intrinsic.porth b/tests/memory-redefinition-of-intrinsic.porth new file mode 100644 index 00000000..0b558870 --- /dev/null +++ b/tests/memory-redefinition-of-intrinsic.porth @@ -0,0 +1 @@ +memory dup 69 end diff --git a/tests/memory-redefinition-of-intrinsic.txt b/tests/memory-redefinition-of-intrinsic.txt new file mode 100644 index 00000000..6dc1fb4b --- /dev/null +++ b/tests/memory-redefinition-of-intrinsic.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 99 +./tests/memory-redefinition-of-intrinsic.porth:1:8: ERROR: redefinition of an intrinsic word `dup` + diff --git a/tests/memory-redefinition-of-macro.porth b/tests/memory-redefinition-of-macro.porth new file mode 100644 index 00000000..534814a7 --- /dev/null +++ b/tests/memory-redefinition-of-macro.porth @@ -0,0 +1,2 @@ +macro hello 69 end +memory hello 69 end diff --git a/tests/memory-redefinition-of-macro.txt b/tests/memory-redefinition-of-macro.txt new file mode 100644 index 00000000..e00d3350 --- /dev/null +++ b/tests/memory-redefinition-of-macro.txt @@ -0,0 +1,10 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 181 +./tests/memory-redefinition-of-macro.porth:2:8: ERROR: redefinition of a macro `hello` +./tests/memory-redefinition-of-macro.porth:1:7: NOTE: the original definition is located here + diff --git a/tests/memory-redefinition.porth b/tests/memory-redefinition.porth new file mode 100644 index 00000000..44ab5a2f --- /dev/null +++ b/tests/memory-redefinition.porth @@ -0,0 +1,2 @@ +memory xs 69 end +memory xs 420 end diff --git a/tests/memory-redefinition.txt b/tests/memory-redefinition.txt new file mode 100644 index 00000000..24454cf6 --- /dev/null +++ b/tests/memory-redefinition.txt @@ -0,0 +1,10 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 168 +./tests/memory-redefinition.porth:2:8: ERROR: redefinition of a memory region `xs` +./tests/memory-redefinition.porth:1:8: NOTE: the original definition is located here + diff --git a/tests/memory-single-number.porth b/tests/memory-single-number.porth new file mode 100644 index 00000000..ffabd320 --- /dev/null +++ b/tests/memory-single-number.porth @@ -0,0 +1 @@ +memory hello end diff --git a/tests/memory-single-number.txt b/tests/memory-single-number.txt new file mode 100644 index 00000000..e4898796 --- /dev/null +++ b/tests/memory-single-number.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 124 +./tests/memory-single-number.porth:1:14: ERROR: The result of expression in compile time evaluation must be a single number + diff --git a/tests/memory-unsupported-token-type.porth b/tests/memory-unsupported-token-type.porth new file mode 100644 index 00000000..abdbbb8c --- /dev/null +++ b/tests/memory-unsupported-token-type.porth @@ -0,0 +1 @@ +memory hello "world" end diff --git a/tests/memory-unsupported-token-type.txt b/tests/memory-unsupported-token-type.txt new file mode 100644 index 00000000..f1d1ee70 --- /dev/null +++ b/tests/memory-unsupported-token-type.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 110 +./tests/memory-unsupported-token-type.porth:1:14: ERROR: strings are not supported in compile time evaluation + diff --git a/tests/memory-unsupported-word.porth b/tests/memory-unsupported-word.porth new file mode 100644 index 00000000..72cf6e3d --- /dev/null +++ b/tests/memory-unsupported-word.porth @@ -0,0 +1 @@ +memory hello word end diff --git a/tests/memory-unsupported-word.txt b/tests/memory-unsupported-word.txt new file mode 100644 index 00000000..7421412e --- /dev/null +++ b/tests/memory-unsupported-word.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 102 +./tests/memory-unsupported-word.porth:1:14: ERROR: unsupported word `word` in compile time evaluation + diff --git a/tests/memory.porth b/tests/memory.porth index 5a39365f..74583558 100644 --- a/tests/memory.porth +++ b/tests/memory.porth @@ -1,22 +1,28 @@ include "std.porth" +memory abc 8 end + // write "abc" into the memory -mem 0 + 97 . -mem 1 + 98 . -mem 2 + 99 . -mem 3 + 10 . +97 abc 0 + !8 +98 abc 1 + !8 +99 abc 2 + !8 +10 abc 3 + !8 // print "abc" to stdout -4 mem stdout write print +4 abc stdout write print // increament each character by 1 making it "bcd" -mem 0 + dup , 1 + . -mem 1 + dup , 1 + . -mem 2 + dup , 1 + . +abc 0 + dup @8 1 + swap !8 +abc 1 + dup @8 1 + swap !8 +abc 2 + dup @8 1 + swap !8 // print "bcd" to stdout -4 mem stdout write print +4 abc stdout write print // print UINT64_MAX (Largest 64 bit word) -mem 18446744073709551615 .64 -mem ,64 print \ No newline at end of file +18446744073709551615 abc !64 +abc @64 print + +255 abc !8 +255 abc 1 + !8 +abc @16 print diff --git a/tests/memory.txt b/tests/memory.txt index a4a9a422..1f12ef65 100644 --- a/tests/memory.txt +++ b/tests/memory.txt @@ -2,12 +2,13 @@ :b stdin 0 :i returncode 0 -:b stdout 33 +:b stdout 39 abc 4 bcd 4 18446744073709551615 +65535 :b stderr 0 diff --git a/tests/not-enough-args-for-do.porth b/tests/not-enough-args-for-do.porth new file mode 100644 index 00000000..67ce4d0f --- /dev/null +++ b/tests/not-enough-args-for-do.porth @@ -0,0 +1,5 @@ +include "std.porth" + +if do + "test\n" puts +end diff --git a/tests/not-enough-args-for-do.txt b/tests/not-enough-args-for-do.txt new file mode 100644 index 00000000..69d4e48b --- /dev/null +++ b/tests/not-enough-args-for-do.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 81 +./tests/not-enough-args-for-do.porth:3:4: ERROR: `do` is not preceded by `while` + diff --git a/tests/offset-reset.porth b/tests/offset-reset.porth new file mode 100644 index 00000000..63476e19 --- /dev/null +++ b/tests/offset-reset.porth @@ -0,0 +1,9 @@ +const sizeof(u64) 8 end + +const Op.type sizeof(u64) offset end +const Op.operand sizeof(u64) offset end +const sizeof(Op) reset end + +Op.type print +Op.operand print +sizeof(Op) print diff --git a/tests/offset-reset.txt b/tests/offset-reset.txt new file mode 100644 index 00000000..f785acd8 --- /dev/null +++ b/tests/offset-reset.txt @@ -0,0 +1,11 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 7 +0 +8 +16 + +:b stderr 0 + diff --git a/tests/original-proc-def-error.porth b/tests/original-proc-def-error.porth new file mode 100644 index 00000000..00e24a11 --- /dev/null +++ b/tests/original-proc-def-error.porth @@ -0,0 +1,11 @@ +include "std.porth" + +proc hello + "Hello, World" puts +end + +proc hello + "Foo, Bar" +end + +hello diff --git a/tests/original-proc-def-error.txt b/tests/original-proc-def-error.txt new file mode 100644 index 00000000..a211d3ff --- /dev/null +++ b/tests/original-proc-def-error.txt @@ -0,0 +1,10 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 170 +./tests/original-proc-def-error.porth:7:6: ERROR: redefinition of a proc `hello` +./tests/original-proc-def-error.porth:3:6: NOTE: the original definition is located here + diff --git a/tests/recursive-const.porth b/tests/recursive-const.porth new file mode 100644 index 00000000..094218e6 --- /dev/null +++ b/tests/recursive-const.porth @@ -0,0 +1 @@ +const N N 1 + end diff --git a/tests/recursive-const.txt b/tests/recursive-const.txt new file mode 100644 index 00000000..d2d58863 --- /dev/null +++ b/tests/recursive-const.txt @@ -0,0 +1,9 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 90 +./tests/recursive-const.porth:1:9: ERROR: unsupported word `N` in compile time evaluation + diff --git a/tests/str-chop-by-delim.porth b/tests/str-chop-by-delim.porth new file mode 100644 index 00000000..2d306cf9 --- /dev/null +++ b/tests/str-chop-by-delim.porth @@ -0,0 +1,16 @@ +include "std.porth" + +memory content sizeof(Str) end +memory line sizeof(Str) end +memory word sizeof(Str) end + +"hello world\nfoo bar\n\n\ntest\n" content !Str + +while content @Str.count 0 > do + '\n' line content str-chop-by-delim + while line @Str.count 0 > do + line str-trim-left + ' ' word line str-chop-by-delim + "|" puts word @Str puts "|\n" puts + end +end diff --git a/tests/str-chop-by-delim.txt b/tests/str-chop-by-delim.txt new file mode 100644 index 00000000..df2bf7f6 --- /dev/null +++ b/tests/str-chop-by-delim.txt @@ -0,0 +1,13 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 35 +|hello| +|world| +|foo| +|bar| +|test| + +:b stderr 0 + diff --git a/tests/streq.porth b/tests/streq.porth new file mode 100644 index 00000000..664295e6 --- /dev/null +++ b/tests/streq.porth @@ -0,0 +1,4 @@ +include "std.porth" + +"foo" "foo" streq print +"foo" "bar" streq print diff --git a/tests/streq.txt b/tests/streq.txt new file mode 100644 index 00000000..54219f1f --- /dev/null +++ b/tests/streq.txt @@ -0,0 +1,10 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 4 +1 +0 + +:b stderr 0 + diff --git a/tests/try-parse-int.porth b/tests/try-parse-int.porth new file mode 100644 index 00000000..6bfecc49 --- /dev/null +++ b/tests/try-parse-int.porth @@ -0,0 +1,20 @@ +include "std.porth" + +proc test-try-parse-int + memory a sizeof(Str) end + a !Str + a @Str try-parse-int if print else + drop + a @Str eputs " is not a number\n" eputs + end +end + +// // TODO: "./tests/try-parse-int.asm:2291: warning: no operand for data declaration" +// // TODO: try-parse-int does not fail on empty list +// "" test-try-parse-int +"1234" test-try-parse-int +"abcd" test-try-parse-int + +// TODO: try-parse-int does not parse negative numbers +// "-1234" test-try-parse-int + diff --git a/tests/try-parse-int.txt b/tests/try-parse-int.txt new file mode 100644 index 00000000..917e4cb4 --- /dev/null +++ b/tests/try-parse-int.txt @@ -0,0 +1,10 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 5 +1234 + +:b stderr 21 +abcd is not a number + diff --git a/tests/while-alter.porth b/tests/while-alter.porth new file mode 100644 index 00000000..e6948e8a --- /dev/null +++ b/tests/while-alter.porth @@ -0,0 +1,3 @@ +while 1 cast(bool) do + 1 +end diff --git a/tests/while-alter.txt b/tests/while-alter.txt new file mode 100644 index 00000000..e182bedd --- /dev/null +++ b/tests/while-alter.txt @@ -0,0 +1,11 @@ +:i argc 0 +:b stdin 0 + +:i returncode 1 +:b stdout 0 + +:b stderr 248 +./tests/while-alter.porth:1:20: ERROR: Loops are not allowed to alter types and amount of elements on the stack. +./tests/while-alter.porth:1:20: NOTE: Expected elements: [] +./tests/while-alter.porth:1:20: NOTE: Actual elements: [] + diff --git a/tests/while-fail.txt b/tests/while-fail.txt index 2266b5ea..848dd355 100644 --- a/tests/while-fail.txt +++ b/tests/while-fail.txt @@ -4,8 +4,6 @@ :i returncode 1 :b stdout 0 -:b stderr 281 -./tests/while-fail.porth:4:1: ERROR: while-do body is not allowed to alter the types of the arguments on the data stack -./tests/while-fail.porth:4:1: NOTE: Expected types: [] -./tests/while-fail.porth:4:1: NOTE: Actual types: [, ] +:b stderr 91 +./tests/while-fail.porth:1:1: ERROR: unhandled data on the data stack: [] diff --git a/tests/while-procs.porth b/tests/while-procs.porth new file mode 100644 index 00000000..11c28fca --- /dev/null +++ b/tests/while-procs.porth @@ -0,0 +1,14 @@ +include "std.porth" + +proc fibs + 0 1 while over 100 < do + over print + 2dup + rot drop + end 2drop +end + +fibs +1 +"------------------------------\n" puts +fibs +drop diff --git a/tests/while-procs.txt b/tests/while-procs.txt new file mode 100644 index 00000000..3156e5a9 --- /dev/null +++ b/tests/while-procs.txt @@ -0,0 +1,33 @@ +:i argc 0 +:b stdin 0 + +:i returncode 0 +:b stdout 89 +0 +1 +1 +2 +3 +5 +8 +13 +21 +34 +55 +89 +------------------------------ +0 +1 +1 +2 +3 +5 +8 +13 +21 +34 +55 +89 + +:b stderr 0 +