Itsy Forth is a tiny subset of the Forth programming language.
So far we've looked at the
Forth
outer interpreter,
inner
interpreter and dictionary. This time we'll define the words required to complete the
interpreter.
Peek and Poke
The Forth words to read and write memory are
@
and
!
:
@
- ( addr -- x ) read x from addr
!
- ( x addr -- ) store x at addr
c@
- ( addr -- char ) read char from addr
( before -- after ) shows the contents of the stack before and after the word executes. Here's
how
@
,
c@
and
!
are implemented. Remember we're keeping the
top element of the data stack in the
bx
register.
primitive '@',fetch
mov bx,word[bx]
jmp next
primitive '!',store
pop word[bx]
pop bx
jmp next
primitive 'c@',c_fetch
mov bl,byte[bx]
mov bh,0
jmp next
Manipulating the Stack
drop
- ( x -- ) remove x from the stack
dup
- ( x -- x x ) add a copy of x to the stack
swap
- ( x y -- y x ) exchange x and y
rot
- ( x y z -- y z x ) rotate x, y and z
primitive 'drop',drop
pop bx
jmp next
primitive 'dup',dupe
push bx
jmp next
primitive 'swap',swap
pop ax
push bx
xchg ax,bx
jmp next
primitive 'rot',rote
pop dx
pop ax
push dx
push bx
xchg ax,bx
jmp next
Flow Control
if
,
else
,
then
,
begin
and
again
all compile to
branch
or
0branch
.
0branch
- ( x -- ) jump if x is zero
branch
- ( -- ) unconditional jump
execute
- ( xt -- ) call the word at xt
exit
- ( -- ) return from the current word
The destination address for the jump is compiled in the cell straight after the
branch
or
0branch
instruction.
execute
stores the return address on the return stack and
exit
removes it.
primitive '0branch',zero_branch
lodsw
test bx,bx
jne zerob_z
xchg ax,si
zerob_z pop bx
jmp next
primitive 'branch',branch
mov si,word[si]
jmp next
primitive 'execute',execute
mov di,bx
pop bx
jmp word[di]
primitive 'exit',exit
mov si,word[bp]
inc bp
inc bp
jmp next
Variables and Constants
tib
- ( -- addr ) address of the input buffer
#tib
- ( -- addr ) number of characters in the input buffer
>in
- ( -- addr ) next character in input buffer
state
- ( -- addr ) true = compiling, false = interpreting
dp
- ( -- addr ) first free cell in the dictionary
base
- ( -- addr ) number base
last
- ( -- addr ) the last word to be defined
constant 'tib',t_i_b,32768
variable '#tib',number_t_i_b,0
variable '>in',to_in,0
variable 'state',state,0
variable 'dp',dp,freemem
variable 'base',base,10
variable 'last',last,final
; execution token for constants
doconst push bx
mov bx,word[di+2]
jmp next
; execution token for variables
dovar push bx
lea bx,[di+2]
jmp next
Compilation
,
- ( x -- ) compile x to the current definition
c,
- ( char -- ) compile char to the current definition
lit
- ( -- ) push the value in the cell straight after lit
primitive ',',comma
mov ax,word[val_dp]
xchg ax,bx
add word[val_dp],2
mov word[bx],ax
pop bx
jmp next
primitive 'c,',c_comma
mov ax,word[val_dp]
xchg ax,bx
inc word[val_dp]
mov byte[bx],al
pop bx
jmp next
primitive 'lit',lit
push bx
lodsw
xchg ax,bx
jmp next
Maths / Logic
+
- ( x y -- z) calculate z=x+y then return z
=
- ( x y -- flag ) return true if x=y
primitive '+',plus
pop ax
add bx,ax
jmp next
primitive '=',equals
pop ax
sub bx,ax
sub bx,1
sbb bx,bx
jmp next
Handling Strings
count
- ( addr -- addr2 len ) addr contains a counted string. Return the address of the first character and the string's length
>number
- ( double addr len -- double2 addr2 len2 ) convert string to number
addr contains a string of
len characters which
>number
attempts to
convert to a number using the current number
base
.
>number
returns
the portion of the string which can't be converted, if any. If you're a Forth purist this is
where Itsy starts to get ugly :-)
primitive 'count',count
inc bx
push bx
mov bl,byte[bx-1]
mov bh,0
jmp next
primitive '>number',to_number
pop di
pop cx
pop ax
to_numl test bx,bx
je to_numz
push ax
mov al,byte[di]
cmp al,'a'
jc to_nums
sub al,32
to_nums cmp al,'9'+1
jc to_numg
cmp al,'A'
jc to_numh
sub al,7
to_numg sub al,48
mov ah,0
cmp al,byte[val_base]
jnc to_numh
xchg ax,dx
pop ax
push dx
xchg ax,cx
mul word[val_base]
xchg ax,cx
mul word[val_base]
add cx,dx
pop dx
add ax,dx
dec bx
inc di
jmp to_numl
to_numz push ax
to_numh push cx
push di
jmp next
Terminal Input / Output
accept
- ( addr len -- len2 ) read a string from the terminal
emit
- ( char -- ) display char on the terminal
word
- ( char -- addr ) parse the next word in the input buffer
accept
reads a string of characters from the terminal. The string is
stored at
addr and can be up to
len characters long.
accept
returns the actual length of the string.
word
reads the next word from the terminal input buffer, delimited
by
char. The address of a counted string is returned. The string length will
be 0 if the input buffer is empty.
primitive 'accept',accept
pop di
xor cx,cx
acceptl call getchar
cmp al,8
jne acceptn
jcxz acceptb
call outchar
mov al,' '
call outchar
mov al,8
call outchar
dec cx
dec di
jmp acceptl
acceptn cmp al,13
je acceptz
cmp cx,bx
jne accepts
acceptb mov al,7
call outchar
jmp acceptl
accepts stosb
inc cx
call outchar
jmp acceptl
acceptz jcxz acceptb
mov al,13
call outchar
mov al,10
call outchar
mov bx,cx
jmp next
getchar mov ah,7
int 021h
mov ah,0
ret
outchar xchg ax,dx
mov ah,2
int 021h
ret
primitive 'word',word
mov di,word[val_dp]
push di
mov dx,bx
mov bx,word[val_t_i_b]
mov cx,bx
add bx,word[val_to_in]
add cx,word[val_number_t_i_b]
wordf cmp cx,bx
je wordz
mov al,byte[bx]
inc bx
cmp al,dl
je wordf
wordc inc di
mov byte[di],al
cmp cx,bx
je wordz
mov al,byte[bx]
inc bx
cmp al,dl
jne wordc
wordz mov byte[di+1],32
mov ax,word[val_dp]
xchg ax,di
sub ax,di
mov byte[di],al
sub bx,word[val_t_i_b]
mov word[val_to_in],bx
pop bx
jmp next
primitive 'emit',emit
xchg ax,bx
call outchar
pop bx
jmp next
Searching the Dictionary
find
- ( addr -- addr2 flag ) look up word in the dictionary
find
looks in the Forth dictionary for the word in the counted
string at
addr. One of the following will be returned:
- flag = 0, addr2 = counted string - if word not found
- flag = 1, addr2 = call address if word is immediate
- flag = -1, addr2 = call address if word is not immediate
primitive 'find',find
mov di,val_last
findl push di
push bx
mov cl,byte[bx]
mov ch,0
inc cx
findc mov al,byte[di+2]
and al,07Fh
cmp al,byte[bx]
je findm
pop bx
pop di
mov di,word[di]
test di,di
jne findl
findnf push bx
xor bx,bx
jmp next
findm inc di
inc bx
loop findc
pop bx
pop di
mov bx,1
inc di
inc di
mov al,byte[di]
test al,080h
jne findi
neg bx
findi and ax,31
add di,ax
inc di
push di
jmp next
Initialisation
abort
- ( -- ) initialise Itsy then jump to interpret
abort
initialises the stacks and a few variables before running the outer interpreter.
When Itsy first runs it jumps to
abort
to set up the system.
primitive 'abort',abort
xor ax,ax
mov word[val_state],ax
mov word[val_to_in],ax
mov word[val_number_t_i_b],ax
xchg ax,bp
mov sp,-256
mov si,xt_interpret+2
jmp next
Up and Running?
Itsy is now around 900 bytes and it's time to give the interpreter a quick test run:
Everything seems to be working fine. Next we'll define the compiler so we can continue building Itsy from the Itsy prompt :-)
Great to see a Forth resource like this one that I saw on Reddit.
ReplyDeleteThanks Mentifix, and thanks to everyone who made Itsy Forth popular on Reddit :-)
DeleteWell done John. I bet it was a great feeling when the interpreter output "Itsy". Keep up the good work.
ReplyDeleteThanks Lawrence. This was actually the second attempt. The first test output "ystI" :-)
DeleteCould you please provide an example in Visual Basic .NET. It is not a very good practice to use assembly language in high level software.
ReplyDeleteYou win
DeleteWould GW-Basic be okay?
DeleteTsk, tsk. This is *retro* programming. Nothing later than IBM BASICA, surely?
DeleteFeel free to post the complete source code for Itsy Forth :)
ReplyDeleteHopefully it won't be long before the complete source is ready. I just need to tie up some loose ends. :-)
DeleteThanks for these posts - fascinating!
ReplyDeleteHow did you get this up and running when (unless I've missed it) you don't yet have definitions of the control strucutres (IF, AGAIN) used by the outer interpreter? Did you hand-compile the 0BRANCH calls in the outer interpreter?
Thank you, I'm pleased you enjoyed them :-)
DeleteI compiled the outer interpreter by hand. Also : and ; will be hand compiled. After that I'm hoping everything will be defined from the Itsy prompt.
Thanks for these posts - I had forgotten how much fun Forth and asm are :)
ReplyDeleteCan't count be implemented with extant primitives?
: count ( addr -- addr2 len ) dup 1 + swap c@ ;
Hi Eventi, it's all down to size. The assembly code is 4 bytes shorter than the compiled Forth :-)
DeleteIt's been a very long time since I've done any programming. This does look like a good language to learn.
ReplyDeletehi................
ReplyDeletefantastic post .I really impressed to see your way of explaining codes .can you tell me what to do to learn the assembly language.
Hi John,
ReplyDeleteI believe there's a minor bug in Itsy Forth's >number routine. If it completes successfully, it returns four words on the top of the stack, just as you describe in your article: the converted double-precision number, the leftover string pointer, and a 0 that's left after counting through the string. However, if it fails due to a non-numeric string, it does not push both words of the double back onto the stack; it only pushes the high-order word, followed by the string pointer and the non-zero
counter value. Three words rather than four. This isn't a problem for interpret, which almost immediately calls abort and clears the stack out anyway, but for somebody else who might try to use >number independently, this could be the source of some serious hair-ripping frustration. It looks to me like it would be easy to fix this simply by changing the "jc to_numh" line to read "jc to_numz".
Mike
Okay, it's time for an OOPS! moment. Seems I made my previous comment a bit too hastily. I noticed after the fact that there was another push to the stack BEFORE the jump in question, and it does indeed put the low word of the double number back on the stack as it should if >number aborts. The modification I suggest to the code is not necessary; in fact, making that patch would be a bug in and of itself. The code returns the correct number of items on the stack as it is, regardless of whether it encounters an illegal character or not. I cobbled together some simple numeric output routines and confirmed this.
DeleteHowever, just to save you from repeating another mistaken assumption I made, let me warn you that the number at the top of the stack after >number is executed (the length counter) is not a reliable indicator of whether >number has successfully converted a numeric input string. Under some circumstances (such as if the very last character in the string is non-numeric), it can have a value of zero even if >number detects an error and aborts. Looking at the code now, I don't think John intended that number to be used for error-checking as I'd originally thought. It's just a counter, not a fault flag. It would be nice to have one available, though. I may see if I can modify the code so that it can be used as such.
My apologies for any confusion my earlier post may have caused.
Hi Mike,
DeleteThanks for the bug report, I'm not sure how it slipped through. It can be fixed simply by moving dec bx further down. I've fixed the code above.
John
Mike, as specified, >NUMBER also supports the legacy Forth practice of placing ANY character as a separator to tell the interpreter/compiler to parse a double. Its also used to handle leading + and - flags.
DeleteSo, start with an arbitrary string. Store 0 in the FIXED? 2VARIABLE. Start converting. It stops at the first value. You do an over C@, store it in a variable, and keep going. It stops again. You store the character and the index in the FIXED? 2VARIABLE and keep going. It completes. You look at the leading character, decide what to do (most often its a - and you call DNEGATE. If the routine that called the number parsing routine is treating values with different decimal point locations differently, the required information has been stored.
So while this interpreter does not have Forth standard number conversion, a use of the information contained in the >NUMBER routine when stops on a non-numeric would support extending it to include it.
A Question. If we changed the getchar and outchar to bios calls instead of int 21h
ReplyDeletewould itsy forth then run as a standalone system (ie no operating system)?
Note that if you have NAND R> and >R you can get the ANS Forth CORE logic words and most of the rest of the CORE stack manipulation words.
ReplyDeletenand is ( x y -- z=NOT{x&y} )
: invert ( x -- y=NOT{x} ) dup nand ;
: and ( x y -- z=x&y ) nand invert ;
: over ( x y -- x y x ) >r dup r> swap ;
: 2dup ( x y -- x y x y ) over over ;
: or ( x y -- z=x|y ) invert swap invert nand ;
: xor ( x y -- z=x^y ) 2dup nand dup >r nand swap r> nand nand ;
XOR is involved enough that I'd go with at least >R R> NAND XOR
We also have a subtraction definition available from the above:
: - ( x y -- z=x-y ) invert 1 + + ;
You can get >R and R> NAND and XOR for the cost of three additional primitives rather than four, since they allow you to make ROT a Forth definition:
: rot ( x y z -- y z x ) >r swap r> swap ;
Add NAND XOR >R and R> and you can synthesize the rest of the CORE logic and much of the stack manipulation. You can even synthesize ROT so that's three extra primitives rather than four:
ReplyDelete: rot ( x y z -- y z x ) >r swap r> swap ;
: over ( x y -- x y x ) >r dup r> swap ;
: 2dup ( x y -- x y x y ) over over ;
: invert ( x -- z=NOT{x} ) dup nand ;
; and ( x y -- z=A&B ) nand invert ;
: or ( x y -- z=A|B ) invert swap invert nand ;
: negate ( x -- -x ) invert 1 + ;
: - ( x y z=x-y) negate + ;
May as well add some more thoughts on extending this through to a Standard ANS Forth (at least as far as ANS Forth CORE).
ReplyDeleteCORE and CORE EXT R operations:
Core: R> >R R@
Core EXT: 2R> 2>R 2R@
Not Standard but common factor: RDROP
If you have R> and >R than you can define the rest ... doing the rest in primitives is therefore a speed vs space tradeoff, not a functional requirement:
: R@ ( -- x | R: x -- x ) R> DUP R> ;
: 2R> ( -- x y | R: x y -- ) R> R> SWAP ;
: 2>R ( x y -- | R: -- x y ) SWAP >R >R ;
: 2R@ ( -- x | R: x -- x ) R> DUP R> DUP ROT >R >R ;
If you have 0< and 0= and use NAND to get your CORE bitwise logic, you can get the boolean operators 0> < = >
: 0> ( x -- fl ) DUP 0= SWAP 0< NAND ;
: < ( x y -- fl ) - 0< ;
: = ( x y -- fl ) - 0= ;
: > ( x y -- fl ) - 0> ;
: ABS ( x -- |x| ) dup 0< if negate then ;
+! can just be:
: +! ( x a -- ) dup >r @ + R> ! ;
1+ CHAR+ 2+ and CELL+ are just
: 1+ ( x -- y ) 1 + ;
: CHAR+ ( ca1 -- ca2 ) 1+ ;
: 2+ ( x -- y ) 2 + ;
: CELL+ ( a1 -- a2 ) 2+ ;
Then 2! and 2@ are (high word first is ANS standard):
: 2! ( x y a -- ) dup >r ! r> 2+ ! ;
: 2@ ( a -- x y ) dup >r cell+ @ r> @ ;
You need 2* and 2/ which then lets you define CELLS with 2* for 16-bit and 2* 2* for 32-bit. CHARS is just a no-op if using ASCII or an 8bit code page:
: CHARS ( x -- y ) ;
* / */ MOD /MOD M* can all be defined from UM/MOD and UM* and D>S S>D from 0< NEGATE 0 and -1 (defined -1 as a constant if negatives are not yet in the INTERPRET routine) ~ definitions omitted here, but they are mostly sign manipulation with 0< and XOR
Whether hardware division is floored or symmetric ~ that is, whether UM/MOD is FM/MOD or SM/REM ~ the other can be derived from the result, and using the hardware integer divide then correcting is both more compact and quicker than writing a division routine in Forth.
?DUP is simply
: ?dup ( x -- 0 | x x ) dup if dup then ;
EVALUATE looks like it would be tricky, but it can be done with your interpret by adding a SOURCE-ID variable and testing it in your INTERPRET. Pushing the current TIB #TIB >IN and SOURCE-ID onto the return stack, store the evaluated string address and count, set SOURCE-ID to -1 >IN to 0 and call INTERPRET, then restore them.
: evaluate ( ca u -- )
#tib @ >r tib @ >in @ >r source-id @ >r
#tib ! tib ! 0 >in ! -1 source-id ! interpreter
r> source-id ! r> >in ! r> tib ! r> #tib ! ;
Extend INTERPRETER so that the test for whether to refill the buffer is:
...
#tib @ >in @ =
if
source-id @ if exit then
tib 50 accept #tib ! 0 >in !
then
...
And the cold start so that 0 is stored in source-id. Now the outer interpreter will loop endlessly for source-id of 0 but will return when called for a single input line by EVALUATE.
If you have a DO LOOP, LSHIFT and RSHIFT can be defined from 2* and 2/
MAX and MIN are:
: max ( x y -- x | y ) 2dup < if swap then drop ;
: min ( x y -- x | y ) 2dup < INVERT if swap then drop ;
UMAX and UMIN similarly, but they require U<.
x U< y ... same as < if both are the same sign
x U< y ... same as ... y 0< ... if they are different signs
: U< ( ux uy -- fl ) \ defined by using signed < and 0<
2dup 0< r> 0< XOR if swap drop 0< else < then ;
.... though the hardware primitive of subtracting them and setting the flag based on whether there was an overflow carry can easily be shorter, and if so, implement it as a primitive.