-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathjonesforth.S
2411 lines (1921 loc) · 83.7 KB
/
jonesforth.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* RISC-V implementation of jones forth.
This repository is intended to migrate the jonesforth compiler and tutorial to RISC-V ISA.
The assembler is rewritten into RISC-V 64 by JJy <[email protected]> https://justjjy.com,
and all the instructions are replaced with RISC-V,
ISA unrelated parts of the tutorial are kept untouched.
So you can use this tutorial just like the original one but for RISC-V ISA.
All the additional work is released under the same PUBLIC DOMAIN
The original file header:
*/
/* A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*-
By Richard W.M. Jones <[email protected]> http://annexia.org/forth
This is PUBLIC DOMAIN (see public domain release statement below).
$Id: jonesforth.S,v 1.47 2009-09-11 08:33:13 rich Exp $
riscv64-unknown-elf-gcc -I /usr/include/ -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
*/
.set JONES_VERSION,47
/*
INTRODUCTION ----------------------------------------------------------------------
FORTH is one of those alien languages which most working programmers regard in the same
way as Haskell, LISP, and so on. Something so strange that they'd rather any thoughts
of it just go away so they can get on with writing this paying code. But that's wrong
and if you care at all about programming then you should at least understand all these
languages, even if you will never use them.
LISP is the ultimate high-level language, and features from LISP are being added every
decade to the more common languages. But FORTH is in some ways the ultimate in low level
programming. Out of the box it lacks features like dynamic memory management and even
strings. In fact, at its primitive level it lacks even basic concepts like IF-statements
and loops.
Why then would you want to learn FORTH? There are several very good reasons. First
and foremost, FORTH is minimal. You really can write a complete FORTH in, say, 2000
lines of code. I don't just mean a FORTH program, I mean a complete FORTH operating
system, environment and language. You could boot such a FORTH on a bare PC and it would
come up with a prompt where you could start doing useful work. The FORTH you have here
isn't minimal and uses a Linux process as its 'base PC' (both for the purposes of making
it a good tutorial). It's possible to completely understand the system. Who can say they
completely understand how Linux works, or gcc?
Secondly FORTH has a peculiar bootstrapping property. By that I mean that after writing
a little bit of assembly to talk to the hardware and implement a few primitives, all the
rest of the language and compiler is written in FORTH itself. Remember I said before
that FORTH lacked IF-statements and loops? Well of course it doesn't really because
such a lanuage would be useless, but my point was rather that IF-statements and loops are
written in FORTH itself.
Now of course this is common in other languages as well, and in those languages we call
them 'libraries'. For example in C, 'printf' is a library function written in C. But
in FORTH this goes way beyond mere libraries. Can you imagine writing C's 'if' in C?
And that brings me to my third reason: If you can write 'if' in FORTH, then why restrict
yourself to the usual if/while/for/switch constructs? You want a construct that iterates
over every other element in a list of numbers? You can add it to the language. What
about an operator which pulls in variables directly from a configuration file and makes
them available as FORTH variables? Or how about adding Makefile-like dependencies to
the language? No problem in FORTH. How about modifying the FORTH compiler to allow
complex inlining strategies -- simple. This concept isn't common in programming languages,
but it has a name (in fact two names): "macros" (by which I mean LISP-style macros, not
the lame C preprocessor) and "domain specific languages" (DSLs).
This tutorial isn't about learning FORTH as the language. I'll point you to some references
you should read if you're not familiar with using FORTH. This tutorial is about how to
write FORTH. In fact, until you understand how FORTH is written, you'll have only a very
superficial understanding of how to use it.
So if you're not familiar with FORTH or want to refresh your memory here are some online
references to read:
http://en.wikipedia.org/wiki/Forth_%28programming_language%29
http://galileo.phys.virginia.edu/classes/551.jvn.fall01/primer.htm
http://wiki.laptop.org/go/Forth_Lessons
http://www.albany.net/~hello/simple.htm
Here is another "Why FORTH?" essay: http://www.jwdt.com/~paysan/why-forth.html
Discussion and criticism of this FORTH here: http://lambda-the-ultimate.org/node/2452
ACKNOWLEDGEMENTS ----------------------------------------------------------------------
This code draws heavily on the design of LINA FORTH (http://home.hccnet.nl/a.w.m.van.der.horst/lina.html)
by Albert van der Horst. Any similarities in the code are probably not accidental.
Some parts of this FORTH are also based on this IOCCC entry from 1992:
http://ftp.funet.fi/pub/doc/IOCCC/1992/buzzard.2.design.
I was very proud when Sean Barrett, the original author of the IOCCC entry, commented in the LtU thread
http://lambda-the-ultimate.org/node/2452#comment-36818 about this FORTH.
And finally I'd like to acknowledge the (possibly forgotten?) authors of ARTIC FORTH because their
original program which I still have on original cassette tape kept nagging away at me all these years.
http://en.wikipedia.org/wiki/Artic_Software
PUBLIC DOMAIN ----------------------------------------------------------------------
I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.
In case this is not legally possible, I grant any entity the right to use this work for any purpose,
without any conditions, unless such conditions are required by law.
SETTING UP ----------------------------------------------------------------------
Let's get a few housekeeping things out of the way. Firstly because I need to draw lots of
ASCII-art diagrams to explain concepts, the best way to look at this is using a window which
uses a fixed width font and is at least this wide:
<------------------------------------------------------------------------------------------------------------------------>
Secondly make sure TABS are set to 8 characters. The following should be a vertical
line. If not, sort out your tabs.
|
|
|
Thirdly I assume that your screen is at least 50 characters high.
ASSEMBLING ----------------------------------------------------------------------
If you want to actually run this FORTH, rather than just read it, you will need Linux on an
RISC-V. Linux because instead of programming directly to the hardware on a bare PC which I
could have done, I went for a simpler tutorial by assuming that the 'hardware' is a Linux
process with a few basic system calls (read, write and exit and that's about all). RISC-V
is needed because I had to write the assembly for a processor.
(Of course when I say 'RISC-V', any 64-bit RISC-V processor or VM will do.
I'm compiling this on a qemu VM).
Again, to assemble this you will need gcc and gas (the GNU assembler). The commands to
assemble and run the code (save this file as 'jonesforth.S') are:
gcc -nostdlib -static -o jonesforth jonesforth.S
cat jonesforth.f - | ./jonesforth
If you want to run your own FORTH programs you can do:
cat jonesforth.f myprog.f | ./jonesforth
If you want to load your own FORTH code and then continue reading user commands, you can do:
cat jonesforth.f myfunctions.f - | ./jonesforth
ASSEMBLER ----------------------------------------------------------------------
(You can just skip to the next section -- you don't need to be able to read assembler to
follow this tutorial).
However if you do want to read the assembly code here are a few notes about gas (the GNU assembler):
(1) Register names are prefixed with 'a', `t` or 's', so a0 is the 64 bit RISC-V register. The registers
available on RISC-V are: `a0 - a7`, `t0 - t6`, `s0 - s11`.
(2) add, mv, etc. take arguments in the form RD, RS1[, RS2]. So mv a0, t0 moves t0 -> a0
(3) li instruction is used for setting a constant value to a register:
li t0, 1 set value 1 to t0
(4) gas has a funky syntax for local labels, where '1f' (etc.) means label '1:' "forwards"
and '1b' (etc.) means label '1:' "backwards". Notice that these labels might be mistaken
for hex numbers (eg. you might confuse 1b with $0x1b).
(5) 'beqz' is "jump if rs is zero", 'bnez' for "jump if rs is not zero", 'j' "jump without condition" etc.
(6) gas has a reasonably nice .macro syntax, and I use them a lot to make the code shorter and
less repetitive.
For more help reading the assembler, do "info gas" at the Linux prompt.
Now the tutorial starts in earnest.
THE DICTIONARY ----------------------------------------------------------------------
In FORTH as you will know, functions are called "words", and just as in other languages they
have a name and a definition. Here are two FORTH words:
: DOUBLE DUP + ; \ name is "DOUBLE", definition is "DUP +"
: QUADRUPLE DOUBLE DOUBLE ; \ name is "QUADRUPLE", definition is "DOUBLE DOUBLE"
Words, both built-in ones and ones which the programmer defines later, are stored in a dictionary
which is just a linked list of dictionary entries.
<--- DICTIONARY ENTRY (HEADER) ----------------------->
+------------------------+--------+---------- - - - - +----------- - - - -
| LINK POINTER | LENGTH/| NAME | DEFINITION
| | FLAGS | |
+--- (8 bytes) ----------+- byte -+- n bytes - - - - +----------- - - - -
I'll come to the definition of the word later. For now just look at the header. The first
8 bytes are the link pointer. This points back to the previous word in the dictionary, or, for
the first word in the dictionary it is just a NULL pointer. Then comes a length/flags byte.
The length of the word can be up to 31 characters (5 bits used) and the top three bits are used
for various flags which I'll come to later. This is followed by the name itself, and in this
implementation the name is rounded up to a multiple of 8 bytes by padding it with zero bytes.
That's just to ensure that the definition starts on a 64 bit boundary.
A FORTH variable called LATEST contains a pointer to the most recently defined word, in
other words, the head of this linked list.
DOUBLE and QUADRUPLE might look like this:
pointer to previous word
^
|
+--|------+---+---+---+---+---+---+---+---+------------- - - - -
| LINK | 6 | D | O | U | B | L | E | 0 | (definition ...)
+---------+---+---+---+---+---+---+---+---+------------- - - - -
^ len padding
|
+--|------+---+---+---+---+---+---+---+---+---+---+---+---+------------- - - - -
| LINK | 9 | Q | U | A | D | R | U | P | L | E | 0 | 0 | (definition ...)
+---------+---+---+---+---+---+---+---+---+---+---+---+---+------------- - - - -
^ len padding
|
|
LATEST
You should be able to see from this how you might implement functions to find a word in
the dictionary (just walk along the dictionary entries starting at LATEST and matching
the names until you either find a match or hit the NULL pointer at the end of the dictionary);
and add a word to the dictionary (create a new definition, set its LINK to LATEST, and set
LATEST to point to the new word). We'll see precisely these functions implemented in
assembly code later on.
One interesting consequence of using a linked list is that you can redefine words, and
a newer definition of a word overrides an older one. This is an important concept in
FORTH because it means that any word (even "built-in" or "standard" words) can be
overridden with a new definition, either to enhance it, to make it faster or even to
disable it. However because of the way that FORTH words get compiled, which you'll
understand below, words defined using the old definition of a word continue to use
the old definition. Only words defined after the new definition use the new definition.
DIRECT THREADED CODE ----------------------------------------------------------------------
Now we'll get to the really crucial bit in understanding FORTH, so go and get a cup of tea
or coffee and settle down. It's fair to say that if you don't understand this section, then you
won't "get" how FORTH works, and that would be a failure on my part for not explaining it well.
So if after reading this section a few times you don't understand it, please email me
Let's talk first about what "threaded code" means. Imagine a peculiar version of C where
you are only allowed to call functions without arguments. (Don't worry for now that such a
language would be completely useless!) So in our peculiar C, code would look like this:
f ()
{
a ();
b ();
c ();
}
and so on. How would a function, say 'f' above, be compiled by a standard C compiler?
Probably into assembly code like this. On the right hand side I've written the actual
RISC-V machine code.
f:
CALL a E8 08 00 00 00
CALL b E8 1C 00 00 00
CALL c E8 2C 00 00 00
; ignore the return from the function for now
"E8" is the x86 machine code to "CALL" a function. In the first 20 years of computing
memory was hideously expensive and we might have worried about the wasted space being used
by the repeated "E8" bytes. We can save 20% in code size (and therefore, in expensive memory)
by compressing this into just:
08 00 00 00 Just the function addresses, without
1C 00 00 00 the CALL prefix.
2C 00 00 00
On a 16-bit machine like the ones which originally ran FORTH the savings are even greater - 33%.
[Historical note: If the execution model that FORTH uses looks strange from the following
paragraphs, then it was motivated entirely by the need to save memory on early computers.
This code compression isn't so important now when our machines have more memory in their L1
caches than those early computers had in total, but the execution model still has some
useful properties].
Of course this code won't run directly on the CPU any more. Instead we need to write an
interpreter which takes each set of bytes and calls it.
On an RISC-V machine it turns out that we can write this interpreter rather easily, in just
two assembly instructions which turn into just 3 bytes of machine code. Let's store the
pointer to the next word to execute in the s1 register:
08 00 00 00 <- We're executing this one now. s1 is the _next_ one to execute.
s1 -> 1C 00 00 00
2C 00 00 00
The all-important instructions are called `ld` and `addi`. Firstly `ld` reads the memory
at s1 into the register (a0). Secondly `addi` increments s1 by 8 bytes
(we are on a 64 bits machine, the pointer size is 64 bits).
So after the two instructions, the situation now looks like this:
08 00 00 00 <- We're still executing this one
1C 00 00 00 <- a0 now contains this address (0x0000001C)
s1 -> 2C 00 00 00
Now we just need to jump to the address in a0. This is again needs two instructions:
firstly `ld t0, 0(a0)` to load the jump address to t0, then `jalr t0` to jump to the address.
And after doing the jump, the situation looks like:
08 00 00 00
1C 00 00 00 <- Now we're executing this subroutine.
s1 -> 2C 00 00 00
To make this work, each subroutine is followed by the four instructions:
'ld a0, 0(s1); addi s1, s1, 8; ld t0, 0(a0); jalr t0'
which make the jump to the next subroutine.
And that brings us to our first piece of actual code! Well, it's a macro.
*/
/* NEXT macro. */
/* use s1 pointing to the interpreter pc
*/
.macro NEXT
ld a0, 0(s1)
addi s1, s1, 8
ld t0, 0(a0)
jalr t0
.endm
/* The macro is called NEXT. That's a FORTH-ism. It expands to those two instructions.
Every FORTH primitive that we write has to be ended by NEXT. Think of it kind of like
a return.
The above describes what is known as direct threaded code.
To sum up: We compress our function calls down to a list of addresses and use a somewhat
magical macro to act as a "jump to next function in the list". We also use one register (s1)
to act as a kind of instruction pointer, pointing to the next function in the list.
I'll just give you a hint of what is to come by saying that a FORTH definition such as:
: QUADRUPLE DOUBLE DOUBLE ;
actually compiles (almost, not precisely but we'll see why in a moment) to a list of
function addresses for DOUBLE, DOUBLE and a special function called EXIT to finish off.
At this point, REALLY EAGLE-EYED ASSEMBLY EXPERTS are saying "JONES, YOU'VE MADE A MISTAKE!".
INDIRECT THREADED CODE ----------------------------------------------------------------------
It turns out that direct threaded code is interesting but only if you want to just execute
a list of functions written in assembly language. So QUADRUPLE would work only if DOUBLE
was an assembly language function. In the direct threaded code, QUADRUPLE would look like:
+------------------+
| addr of DOUBLE --------------------> (assembly code to do the double)
+------------------+ NEXT
s1 -> | addr of DOUBLE |
+------------------+
We can add an extra indirection to allow us to run both words written in assembly language
(primitives written for speed) and words written in FORTH themselves as lists of addresses.
The extra indirection is the reason for the `ld t0, 0(a0)`.
Let's have a look at how QUADRUPLE and DOUBLE really look in FORTH:
: QUADRUPLE DOUBLE DOUBLE ;
+------------------+
| codeword | : DOUBLE DUP + ;
+------------------+
| addr of DOUBLE ---------------> +------------------+
+------------------+ | codeword |
| addr of DOUBLE | +------------------+
+------------------+ | addr of DUP --------------> +------------------+
| addr of EXIT | +------------------+ | codeword -------+
+------------------+ s1 -> | addr of + --------+ +------------------+ |
+------------------+ | | assembly to <-----+
| addr of EXIT | | | implement DUP |
+------------------+ | | .. |
| | .. |
| | NEXT |
| +------------------+
|
+-----> +------------------+
| codeword -------+
+------------------+ |
| assembly to <------+
| implement + |
| .. |
| .. |
| NEXT |
+------------------+
This is the part where you may need an extra cup of tea/coffee/favourite caffeinated
beverage. What has changed is that I've added an extra pointer to the beginning of
the definitions. In FORTH this is sometimes called the "codeword". The codeword is
a pointer to the interpreter to run the function. For primitives written in
assembly language, the "interpreter" just points to the actual assembly code itself.
They don't need interpreting, they just run.
In words written in FORTH (like QUADRUPLE and DOUBLE), the codeword points to an interpreter
function.
I'll show you the interpreter function shortly, but let's recall our indirect
jump instructions. Take the case where we're executing DOUBLE
as shown, and DUP has been called. Note that s1 is pointing to the address of +
The assembly code for DUP eventually does a NEXT. That:
(1) reads the address of + into a0 a0 points to the codeword of +
(2) increments s1 by 8
(3) jumps to the indirect t0 jumps to the address in the codeword of +,
ie. the assembly code to implement +
+------------------+
| codeword |
+------------------+
| addr of DOUBLE ---------------> +------------------+
+------------------+ | codeword |
| addr of DOUBLE | +------------------+
+------------------+ | addr of DUP --------------> +------------------+
| addr of EXIT | +------------------+ | codeword -------+
+------------------+ | addr of + --------+ +------------------+ |
+------------------+ | | assembly to <-----+
s1 -> | addr of EXIT | | | implement DUP |
+------------------+ | | .. |
| | .. |
| | NEXT |
| +------------------+
|
+-----> +------------------+
| codeword -------+
+------------------+ |
now we're | assembly to <-----+
executing | implement + |
this | .. |
function | .. |
| NEXT |
+------------------+
So I hope that I've convinced you that NEXT does roughly what you'd expect. This is
indirect threaded code.
I've glossed over four things. I wonder if you can guess without reading on what they are?
.
.
.
My list of four things are: (1) What does "EXIT" do? (2) which is related to (1) is how do
you call into a function, ie. how does s1 start off pointing at part of QUADRUPLE, but
then point at part of DOUBLE. (3) What goes in the codeword for the words which are written
in FORTH? (4) How do you compile a function which does anything except call other functions
ie. a function which contains a number like : DOUBLE 2 * ; ?
THE INTERPRETER AND RETURN STACK ------------------------------------------------------------
Going at these in no particular order, let's talk about issues (3) and (2), the interpreter
and the return stack.
Words which are defined in FORTH need a codeword which points to a little bit of code to
give them a "helping hand" in life. They don't need much, but they do need what is known
as an "interpreter", although it doesn't really "interpret" in the same way that, say,
Java bytecode used to be interpreted (ie. slowly). This interpreter just sets up a few
machine registers so that the word can then execute at full speed using the indirect
threaded model above.
One of the things that needs to happen when QUADRUPLE calls DOUBLE is that we save the old
s1 ("instruction pointer") and create a new one pointing to the first word in DOUBLE.
Because we will need to restore the old s1 at the end of DOUBLE (this is, after all, like
a function call), we will need a stack to store these "return addresses" (old values of s1).
As you will have seen in the background documentation, FORTH has two stacks, an ordinary
stack for parameters, and a return stack which is a bit more mysterious. But our return
stack is just the stack I talked about in the previous paragraph, used to save s1 when
calling from a FORTH word into another FORTH word.
In this FORTH, we are using the normal stack pointer (sp) for the parameter stack.
We will use the RISC-V's "other" stack pointer (fp, usually called the "frame pointer")
for our return stack.
I've got two macros which just wrap up the details of using fp for the return stack.
You use them as for example "PUSHRSP a0" (push a0 on the return stack) or "POPRSP a1"
(pop top of return stack into a1).
*/
/* Macros to deal with the return stack. */
.macro PUSH regs:vararg
PUSH_ADJ 0, \regs // push reg on to stack
PUSH_REGS \regs
.endm
.macro PUSH_ADJ depth reg regs:vararg
.ifb \regs
addi sp,sp,\depth-8
.else
PUSH_ADJ \depth-8, \regs
.endif
.endm
.macro PUSH_REG dst off reg="" regs:vararg
.ifb \reg
sd \dst,(\off)(sp)
.else
PUSH_REG \dst, \off+8, \regs
.endif
.endm
.macro PUSH_REGS reg regs:vararg
.ifb \regs
.else
PUSH_REGS \regs
.endif
PUSH_REG \reg 0 \regs
.endm
.macro POP regs:vararg
POP_R 0, \regs // pop regs off the stack
.endm
.macro POP_R depth reg regs:vararg
ld \reg,\depth(sp)
.ifb \regs
addi sp,sp,\depth+8
.else
POP_R \depth+8, \regs
.endif
.endm
.macro PUSHRSP reg
addi fp, fp, -8 // push reg on to stack
sd \reg, 0(fp)
.endm
.macro POPRSP reg
ld \reg, 0(fp) // pop top of return stack to reg
addi fp, fp, 8
.endm
/* Macros to help us handling function calls.
This macro saves all nessacery registers before a function call
and resume these registers after the call */
.macro RCALL symbol
PUSH ra // push ra (return address) on to stack
call \symbol
POP ra // resume ra
.endm
/*
And with that we can now talk about the interpreter.
In FORTH the interpreter function is often called DOCOL (I think it means "DO COLON" because
all FORTH definitions start with a colon, as in : DOUBLE DUP + ;
The "interpreter" (it's not really "interpreting") just needs to push the old s1 on the
stack and set s1 to the first word in the definition. Remember that we jumped to the
function using `ld t0, 0(a0); jalr t0`? Well a consequence of that is that conveniently a0 contains
the address of this codeword, so just by adding 8 to it we get the address of the first
data word. Finally after setting up s1, it just does NEXT which causes that first word
to run.
*/
/* DOCOL - the interpreter! */
.text
.balign 8
DOCOL:
PUSHRSP s1 // push s1 on to the return stack
addi a0, a0, 8 // a0 points to codeword, so make
mv s1, a0 // s1 point to first data word
NEXT
/*
Just to make this absolutely clear, let's see how DOCOL works when jumping from QUADRUPLE
into DOUBLE:
QUADRUPLE:
+------------------+
| codeword |
+------------------+ DOUBLE:
| addr of DOUBLE ---------------> +------------------+
+------------------+ a0 -> | addr of DOCOL |
s1 -> | addr of DOUBLE | +------------------+
+------------------+ | addr of DUP |
| addr of EXIT | +------------------+
+------------------+ | etc. |
First, the call to DOUBLE calls DOCOL (the codeword of DOUBLE). DOCOL does this: It
pushes the old s1 on the return stack. a0 points to the codeword of DOUBLE, so we
just add 8 on to it to get our new s1:
QUADRUPLE:
+------------------+
| codeword |
+------------------+ DOUBLE:
| addr of DOUBLE ---------------> +------------------+
top of return +------------------+ a0 -> | addr of DOCOL |
stack points -> | addr of DOUBLE | + 8 = +------------------+
+------------------+ s1 -> | addr of DUP |
| addr of EXIT | +------------------+
+------------------+ | etc. |
Then we do NEXT, and because of the magic of threaded code that increments s1 again
and calls DUP.
Well, it seems to work.
One minor point here. Because DOCOL is the first bit of assembly actually to be defined
in this file (the others were just macros), and because I usually compile this code with the
text segment starting at address 0, DOCOL has address 0. So if you are disassembling the
code and see a word with a codeword of 0, you will immediately know that the word is
written in FORTH (it's not an assembler primitive) and so uses DOCOL as the interpreter.
STARTING UP ----------------------------------------------------------------------
Now let's get down to nuts and bolts. When we start the program we need to set up
a few things like the return stack. But as soon as we can, we want to jump into FORTH
code (albeit much of the "early" FORTH code will still need to be written as
assembly language primitives).
This is what the set up code does. Does a tiny bit of house-keeping, sets up the
separate return stack (NB: Linux gives us the ordinary parameter stack already), then
immediately jumps to a FORTH word called QUIT. Despite its name, QUIT doesn't quit
anything. It resets some internal state and starts reading and interpreting commands.
(The reason it is called QUIT is because you can call QUIT from your own FORTH code
to "quit" your program and go back to interpreting).
*/
/* Assembler entry point. */
.text
.globl _start
_start:
la t0, var_S0
sd sp, 0(t0) // Save the initial data stack pointer in FORTH variable S0.
la fp, return_stack_top // Initialise the return stack.
call set_up_data_segment
la s1, cold_start // Initialise interpreter.
NEXT // Run interpreter!
.section .rodata
.balign 8
cold_start: // High-level code without a codeword.
.dword QUIT
/*
BUILT-IN WORDS ----------------------------------------------------------------------
Remember our dictionary entries (headers)? Let's bring those together with the codeword
and data words to see how : DOUBLE DUP + ; really looks in memory.
pointer to previous word
^
|
+--|------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
| LINK | 6 | D | O | U | B | L | E | 0 | DOCOL | DUP | + | EXIT |
+---------+---+---+---+---+---+---+---+---+------------+--|---------+------------+------------+
^ len pad codeword |
| V
LINK in next word points to codeword of DUP
Initially we can't just write ": DOUBLE DUP + ;" (ie. that literal string) here because we
don't yet have anything to read the string, break it up at spaces, parse each word, etc. etc.
So instead we will have to define built-in words using the GNU assembler data constructors
(like .dword, .byte, .string, .ascii and so on -- look them up in the gas info page if you are
unsure of them).
The long way would be:
.dword <link to previous word>
.byte 6 // len
.ascii "DOUBLE" // string
.byte 0 // padding
DOUBLE: .dword DOCOL // codeword
.dword DUP // pointer to codeword of DUP
.dword PLUS // pointer to codeword of +
.dword EXIT // pointer to codeword of EXIT
That's going to get quite tedious rather quickly, so here I define an assembler macro
so that I can just write:
defword "DOUBLE",6,,DOUBLE
.dword DUP,PLUS,EXIT
and I'll get exactly the same effect.
Don't worry too much about the exact implementation details of this macro - it's complicated!
*/
/* Flags - these are discussed later. */
.set F_IMMED,0x80
.set F_HIDDEN,0x20
.set F_LENMASK,0x1f // length mask
// Store the chain of links.
.macro defword name, namelen, flags=0, label, link
.section .rodata
.balign 8
.globl name_\label
name_\label :
.dword name_\link // link
.byte \flags+\namelen // flags + length byte
.ascii "\name" // the name
.balign 8 // padding to next 8 byte boundary
.globl \label
\label :
.dword DOCOL // codeword - the interpreter
// list of word pointers follow
.endm
/*
Similarly I want a way to write words written in assembly language. There will quite a few
of these to start with because, well, everything has to start in assembly before there's
enough "infrastructure" to be able to start writing FORTH words, but also I want to define
some common FORTH words in assembly language for speed, even though I could write them in FORTH.
This is what DUP looks like in memory:
pointer to previous word
^
|
+--|------+---+---+---+---+------------+
| LINK | 3 | D | U | P | code_DUP ---------------------> points to the assembly
+---------+---+---+---+---+------------+ code used to write DUP,
^ len codeword which ends with NEXT.
|
LINK in next word
Again, for brevity in writing the header I'm going to write an assembler macro called defcode.
As with defword above, don't worry about the complicated details of the macro.
*/
.macro defcode name, namelen, flags=0, label, link
.section .rodata
.balign 8
.globl name_\label
name_\label :
.dword name_\link // link
.byte \flags+\namelen // flags + length byte
.ascii "\name" // the name
.balign 8 // padding to next 8 byte boundary
.globl \label
\label :
.dword code_\label // codeword
.text
.balign 8
.globl code_\label
code_\label : // assembler code follows
.endm
/*
Now some easy FORTH primitives. These are written in assembly for speed. If you understand
RISC-V assembly language then it is worth reading these. However if you don't understand assembly
you can skip the details.
*/
.set name_NULL, 0
defcode "DROP",4,,DROP, NULL
POP a0 // drop top of stack
NEXT
defcode "SWAP",4,,SWAP, DROP
POP a0 a1 // swap top two elements on stack
PUSH a0 a1
NEXT
defcode "DUP",3,,DUP, SWAP
ld a0, 0(sp) // duplicate top of stack
PUSH a0
NEXT
defcode "OVER",4,,OVER, DUP
ld a0, 8(sp) // get the second element of stack
PUSH a0 // and push it on top
NEXT
defcode "ROT",3,,ROT, OVER
POP a0 a1 a2
PUSH a1 a0 a2
NEXT
defcode "-ROT",4,,NROT, ROT
POP a0 a1 a2
PUSH a0 a2 a1
NEXT
defcode "2DROP",5,,TWODROP, NROT // drop top two elements of stack
POP a0 a0
NEXT
defcode "2DUP",4,,TWODUP, TWODROP // duplicate top two elements of stack
ld a0, 0(sp)
ld a1, 8(sp)
PUSH a1 a0
NEXT
defcode "2SWAP",5,,TWOSWAP, TWODUP // swap top two pairs of elements of stack
POP a0 a1 a2 a3
PUSH a1 a0 a3 a2
NEXT
defcode "?DUP",4,,QDUP, TWOSWAP // duplicate top of stack if non-zero
ld a0, 0(sp)
beqz a0, 1f
PUSH a0
1: NEXT
defcode "1+",2,,INCR, QDUP
POP a0
addi a0, a0, 1 // increment top of stack
PUSH a0
NEXT
defcode "1-",2,,DECR, INCR
POP a0
addi a0, a0, -1 // decrement top of stack
PUSH a0
NEXT
defcode "4+",2,,INCR4, DECR
POP a0
addi a0, a0, 4 // add 4 to top of stack
PUSH a0
NEXT
defcode "4-",2,,DECR4, INCR4
POP a0
addi a0, a0, -4 // subtract 4 from top of stack
PUSH a0
NEXT
defcode "8+",2,,INCR8, INCR4
POP a0
addi a0, a0, 8 // add 8 to top of stack
PUSH a0
NEXT
defcode "8-",2,,DECR8, INCR8
POP a0
addi a0, a0, -8 // subtract 8 from top of stack
PUSH a0
NEXT
defcode "+",1,,ADD, DECR8
POP a0 a1 // get top and second of stack
add a0, a0, a1 // and add the two number
PUSH a0 // push back the result to stack
NEXT
defcode "-",1,,SUB, ADD
POP a0 a1 // get top and second of stack
sub a0, a1, a0 // and subtract the two number
PUSH a0 // push back the result to stack
NEXT
defcode "*",1,,MUL, SUB
POP a0 a1
mul a0, a0, a1
PUSH a0 // ignore overflow
NEXT
/*
In this FORTH, only /MOD is primitive. Later we will define the / and MOD words in
terms of the primitive /MOD.
*/
/*
From the RISC-V spec [7.2]:
If both the quotient and remainder are required from the same division, the
recommended code sequence is: DIV[U] rdq, rs1, rs2; REM[U] rdr, rs1, rs2 (rdq cannot be the
same as rs1 or rs2). Microarchitectures can then fuse these into a single divide operation instead
of performing two separate divides.
*/
defcode "/MOD",4,,DIVMOD, MUL
POP a0 a1
div a3, a1, a0
rem a4, a1, a0
PUSH a4 a3 // push a4 = remained a3 = quotient
NEXT
/*
Lots of comparison operations like =, <, >, etc..
ANS FORTH says that the comparison words should return all (binary) 1's for
TRUE and all 0's for FALSE. However this is a bit of a strange convention
so this FORTH breaks it and returns the more normal (for C programmers ...)
1 meaning TRUE and 0 meaning FALSE.
*/
defcode "=",1,,EQU, DIVMOD // top two words are equal?
POP a0 a1
sub a0, a0, a1
seqz a0, a0 // set a0 to 1 if a0 is zero
PUSH a0
NEXT
defcode "<>",2,,NEQU, EQU // top two words are not equal?
POP a0 a1
sub a0, a0, a1
sltu a0, zero, a0 // set a0 to 1 if a0 is not equals to zero, otherwise set a0 to 0
PUSH a0
NEXT
defcode "<",1,,LT, NEQU
POP a0 a1
slt a0, a1, a0 // set a0 to 1 if a1 < a0, otherwise set a0 to 0
PUSH a0
NEXT
defcode ">",1,,GT, LT
POP a0 a1
slt a0, a0, a1 // set a0 to 1 if a0 < a1, otherwise set a0 to 0
PUSH a0
NEXT
defcode "<=",2,,LE, GT
POP a0 a1
slt t0, a0, a1 // if a1 <= a0, then !(a0 < a1)
li t1, 1
sub t0, t1, t0
PUSH t0
NEXT
defcode ">=",2,,GE, LE
POP a0 a1
slt t0, a1, a0 // if a1 >= a0, then !(a1 < a0)
li t1, 1
sub t0, t1, t0
PUSH t0
NEXT
defcode "0=",2,,ZEQU, GE // top of stack equals 0?
POP a0
seqz a0, a0
PUSH a0
NEXT
defcode "0<>",3,,ZNEQU, ZEQU // top of stack not 0?
POP a0
sltu a0, zero, a0
PUSH a0
NEXT
defcode "0<",2,,ZLT, ZNEQU // comparisons with 0
POP a0
slt a0, a0, zero
PUSH a0
NEXT
defcode "0>",2,,ZGT, ZLT
POP a0
slt a0, zero, a0
PUSH a0
NEXT
defcode "0<=",3,,ZLE, ZGT
POP a0
slt t0, zero, a0
li t1, 1
sub t0, t1, t0
PUSH t0
NEXT
defcode "0>=",3,,ZGE, ZLE
POP a0
slt t0, a0, zero
li t1, 1
sub t0, t1, t0
PUSH t0
NEXT
defcode "AND",3,,AND, ZGE // bitwise AND
POP a0 a1
and a0, a0, a1
PUSH a0
NEXT
defcode "OR",2,,OR, AND // bitwise OR
POP a0 a1
or a0, a0, a1
PUSH a0
NEXT
defcode "XOR",3,,XOR, OR // bitwise XOR
POP a0 a1
xor a0, a0, a1
PUSH a0
NEXT
defcode "INVERT",6,,INVERT, XOR // this is the FORTH bitwise "NOT" function (cf. NEGATE and NOT)
POP a0
not a0, a0
PUSH a0
NEXT