/*
 * ANS Forth core engine. This program implements a Forth interpreter,
 * along with native implementations of some of the ANS Forth standard
 * words. Along with a Forth implementation of other words, this
 * program provides a functional Forth implementation.
 *
 * This Forth system has the following characteristics:
 * -- it is written in standard C and should run on many machine types
 * (there are some conditions which are met by virtually all modern
 * desktop computers);
 * -- it implements a virtual machine, whose memory size and natural
 * data length are configurable;
 * -- it checks all memory accesses and never crashes, whatever the
 * Forth code does (instead of crashing, the program quits gracefully);
 * -- it is optimized not for speed but for conciseness and clarity.
 *
 * This file is in Public Domain.
 */

/*
 * This program works on machines with the following characteristics:
 * -- two's complement arithmetics which truncates data on overflow and
 * underflow;
 * -- no trap representation nor invalid encodings for arithmetic types;
 * -- 8-bit bytes (CHAR_BIT == 8);
 * -- at least one suitable integer type: its size must be a power of 2,
 * greater than or equal to 2 bytes.
 *
 * The source files are interpreted as ASCII (or anything which is
 * ASCII-compatible), regardless of the host charset; however, fgets()
 * is used to process input source files, hence the host charset must
 * not conflict too badly with ASCII, especially for end-of-line
 * indicators.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <stdarg.h>

/*
 * Generic failure function. Whenever something goes wrong, we print
 * an error message on stderr, (syntax is similar to printf()) and then
 * we exit.
 */
static void
panic(char *fmt, ...)
{
	va_list ap;

	va_start(ap, fmt);
	fprintf(stderr, "panic: ");
	vfprintf(stderr, fmt, ap);
	fprintf(stderr, "\n");
	va_end(ap);
	exit(EXIT_FAILURE);
}

/*
 * Cell type definition. This is the basic Forth data type; a cell must
 * be wide enough to encode all Forth addresses. The cell type must have
 * a length which is a power of 2, greater than or equal to 2.
 *
 * The default type is "long", which will yield a 32-bit or a 64-bit Forth
 * on most machines.
 *
 * The Forth "address units" are C "unsigned char" (bytes) and a character
 * always uses exactly one address unit.
 */
typedef long cell;
typedef unsigned long ucell;
#define DCELL      "%ld"
#define UCELL      "%lu"
#define XCELL      "0x%08lX"
#define CELLMIN    LONG_MIN
#define CELLMAX    LONG_MAX
#define UCELLMAX   ULONG_MAX

/*
 * Helper macros.
 */
#define CELLSIZE    (sizeof(cell))
#define CELLS(x)    ((x) * CELLSIZE)

/*
 * The "dictionary" is the whole virtual machine memory area. Its
 * length is expressed in bytes. Within the Forth system, an address
 * is an offset in that area.
 *
 * The word headers, names and definitions go to the beginning of that
 * area. The data and return stack are located at the end. The input
 * buffer is dynamically allocated at the end, just below the two stacks.
 */

#define DICTIONARY_LEN      1048576  /* bytes */
#define DATA_STACK_LEN         1152  /* cells */
#define RETURN_STACK_LEN       1024  /* cells */
#define PAD_LEN                 256  /* bytes */
#define MIN_LINE_LEN            128  /* bytes */

/*
 * The dictionary is a static array. Modern Unix systems will allocate
 * pages when they are accessed, hence there is no problem in having
 * a large dictionary area. We use a "union" definition in order to
 * ensure cell alignment.
 */
static union {
	unsigned char b[DICTIONARY_LEN];
	cell c[DICTIONARY_LEN / CELLSIZE];
} dictionary;

/*
 * Stacks grow towards lower addresses. The stack pointers point to the
 * TOS (Top Of Stack).
 */
#define DATA_STACK_BEGIN     (DICTIONARY_LEN - DATA_STACK_LEN)
#define DATA_STACK_END       DICTIONARY_LEN
#define RETURN_STACK_BEGIN   (DATA_STACK_BEGIN - RETURN_STACK_LEN)
#define RETURN_STACK_END     DATA_STACK_BEGIN

#define RESERVED_BEGIN       RETURN_STACK_BEGIN

/*
 * The dictionary begins with some reserved data; those cells contain
 * the data which is needed for proper communication between the C world
 * and the Forth world.
 *
 * Memory layout:
 *   constant -1            1 cell   used for CREATEd words (-1 == OP_EXIT)
 *   BASE                   1 cell   contains the current parsing base
 *   HERE pointer           1 cell   contains the value of HERE
 *   current xt ptr         1 cell   points to the currently defined code  
 *   last word ptr          1 cell   points to the last defined word
 *   source ID              1 cell   contains the source ID
 *   source buffer ptr      1 cell   points to the source buffer
 *   source buffer length   1 cell   contains the source buffer length
 *   >IN                    1 cell   >IN value
 *   STATE                  1 cell   -1 when compiling, 0 otherwise.
 *   current wordlist       1 cell   pointer to current definition wordlist
 *   current search         1 cell   pointer to current search top wordlist
 */

/*
 * These macros contain the reserved data field addresses.
 */
#define FA_BASE            CELLSIZE
#define FA_HERE            (FA_BASE + CELLSIZE)
#define FA_CURRENT_XT      (FA_HERE + CELLSIZE)
#define FA_LAST_WORD       (FA_CURRENT_XT + CELLSIZE)
#define FA_SOURCE_ID       (FA_LAST_WORD + CELLSIZE)
#define FA_SOURCE_PTR      (FA_SOURCE_ID + CELLSIZE)
#define FA_SOURCE_LEN      (FA_SOURCE_PTR + CELLSIZE)
#define FA_TOIN            (FA_SOURCE_LEN + CELLSIZE)
#define FA_STATE           (FA_TOIN + CELLSIZE)
#define FA_CURRENT_LIST    (FA_STATE + CELLSIZE)
#define FA_CURRENT_SEARCH  (FA_CURRENT_LIST + CELLSIZE)

#define FA_EORA            (FA_CURRENT_SEARCH + CELLSIZE)

/*
 * Those macros implement data fetch and store, both for cells and
 * bytes. They are used when the address is known to be valid and
 * correctly aligned (e.g., for the reserved field addresses).
 */
#define FETCH(addr)           (*(cell *)(dictionary.b + (addr)))
#define STORE(addr, value)    (*(cell *)(dictionary.b + (addr)) = (value))
#define FETCH8(addr)          (*(dictionary.b + (addr)))
#define STORE8(addr, value)   (*(dictionary.b + (addr)) = (value))

/*
 * Fetch the cell value at the provided address. Panic on unaligned
 * or out-of-range address.
 */
static cell
fetch(ucell addr)
{
	if ((addr & (CELLSIZE - 1)) != 0)
		panic("unaligned read access: " XCELL, addr);
	if (addr > DICTIONARY_LEN - CELLSIZE)
		panic("read access (cell) to invalid address: " XCELL, addr);
	return FETCH(addr);
}

/*
 * Store the cell "value" at the provided address. Panic on unaligned
 * or out-of-range address.
 */
static void
store(ucell addr, cell value)
{
	if ((addr & (CELLSIZE - 1)) != 0)
		panic("unaligned write access: " XCELL, addr);
	if (addr > DICTIONARY_LEN - CELLSIZE)
		panic("write access (cell) to invalid address: " XCELL, addr);
	STORE(addr, value);
}

/*
 * Fetch the byte at the provided address. Panic on out-of-range address.
 */
static cell
fetch8(ucell addr)
{
	if (addr >= DICTIONARY_LEN)
		panic("read access (byte) to invalid address: " XCELL, addr);
	return FETCH8(addr);
}

/*
 * Store the byte "value" at the provided address. Panic on out-of-range
 * address. The value is suitably truncated.
 */
static void
store8(ucell addr, cell value)
{
	if (addr >= DICTIONARY_LEN)
		panic("write access (byte) to invalid address: " XCELL, addr);
	STORE8(addr, value);
}

/*
 * Data and return stack pointers are kept as native C pointers. They
 * are always cell-aligned.
 */
static cell *data_stack_ptr, *return_stack_ptr;

/*
 * Push a value on the data stack. Panic on stack overflow (stack pointer
 * out of range).
 */
static void
push(cell value)
{
	if ((unsigned char *)data_stack_ptr <= &dictionary.b[DATA_STACK_BEGIN])
		panic("data stack overflow (push)");

	*-- data_stack_ptr = value;
}

/*
 * Pop a value from the data stack. Panic on stack overflow (stack pointer
 * out of range) or unaligned stack pointer.
 */
static cell
pop(void)
{
	if ((unsigned char *)data_stack_ptr >= &dictionary.b[DATA_STACK_END])
		panic("data stack underflow (pop)");

	return *data_stack_ptr ++;
}

/*
 * Get a copy of a value in the data stack (element 0 is the TOS, element 1
 * is the one just below,...). Panic on stack underflow.
 */
static cell
look(ucell elt)
{
	if ((ucell)(&dictionary.c[DATA_STACK_END / CELLSIZE]
		- data_stack_ptr) <= elt)
		panic("data stack underflow (look)");

	return *(data_stack_ptr + elt);
}

/*
 * Push a value on the return stack. Panic on stack overflow.
 */
static void
push_ret(cell value)
{
	if ((unsigned char *)return_stack_ptr
		<= &dictionary.b[RETURN_STACK_BEGIN])
		panic("return stack overflow (push)");

	*-- return_stack_ptr = value;
}

/*
 * Pop a value from the return stack. Panic on stack overflow (stack pointer
 * out of range) or unaligned stack pointer.
 */
static cell
pop_ret(void)
{
	if ((unsigned char *)return_stack_ptr
		>= &dictionary.b[RETURN_STACK_END])
		panic("return stack underflow (pop)");

	return *return_stack_ptr ++;
}

/*
 * Get a copy of a value in the return stack (element 0 is the TOS, element 1
 * is the one just below,...). Panic on stack underflow.
 */
static cell
look_ret(ucell elt)
{
	if ((ucell)(&dictionary.c[RETURN_STACK_END / CELLSIZE]
		- return_stack_ptr) <= elt)
		panic("return stack underflow (look)");

	return *(return_stack_ptr + elt);
}

/*
 * Check that the provided range (address and length) fits completely
 * within the dictionary area. Panic if this is not true.
 */
static void
check_range(ucell addr, ucell len)
{
	if (len > DICTIONARY_LEN || addr > DICTIONARY_LEN
		|| (addr + len) > DICTIONARY_LEN)
		panic("invalid range: " XCELL "[" UCELL "]", addr, len);
}

/*
 * Get a native (C) pointer to the byte whose address is provided as
 * argument. This is used along with check_range(), in order to use
 * native C functions such as memcpy() with Forth addresses.
 */
static void *
native_address(ucell addr)
{
	return &dictionary.b[addr];
}

#ifdef DEBUG
/*
 * Write a Forth string (provided as address and length, with no
 * zero-byte terminator) to stderr.
 */
static void
logword(ucell addr, ucell len)
{
	fwrite(native_address(addr), 1, len, stderr);
}
#endif

/*
 * Data stack depth.
 */
static ucell
data_stack_depth()
{
	return &dictionary.c[DATA_STACK_END / CELLSIZE] - data_stack_ptr;
}

#if 0
/* currently unused */
/*
 * Return stack depth.
 */
static ucell
return_stack_depth()
{
	return &dictionary.c[RETURN_STACK_END / CELLSIZE] - return_stack_ptr;
}
#endif

/*
 * Native SWAP
 */
static void
swap(void)
{
	cell tmp;

	if (data_stack_depth() < 2)
		panic("data stack underflow (swap)");
	tmp = *data_stack_ptr;
	*data_stack_ptr = *(data_stack_ptr + 1);
	*(data_stack_ptr + 1) = tmp;
}

/*
 * Native ROT
 */
static void
rot(void)
{
	cell tmp;

	if (data_stack_depth() < 3)
		panic("data stack underflow (rot)");
	tmp = *(data_stack_ptr + 2);
	*(data_stack_ptr + 2) = *(data_stack_ptr + 1);
	*(data_stack_ptr + 1) = *data_stack_ptr;
	*data_stack_ptr = tmp;
}

/*
 * Native -ROT (not in ANS Forth; may be defined as such: : -ROT ROT ROT ; )
 */
static void
notrot(void)
{
	cell tmp;

	if (data_stack_depth() < 3)
		panic("data stack underflow (-rot)");
	tmp = *data_stack_ptr;
	*data_stack_ptr = *(data_stack_ptr + 1);
	*(data_stack_ptr + 1) = *(data_stack_ptr + 2);
	*(data_stack_ptr + 2) = tmp;
}

/*
 * Native DUP
 */
static void
dup(void)
{
	push(look(0));
}

/*
 * Native OVER
 */
static void
over(void)
{
	push(look(1));
}

/*
 * Native ROLL
 */
static void
roll(void)
{
	ucell cc = pop();
	cell tmp;

	if (cc == 0)
		return;
	tmp = look(cc);
	memmove(data_stack_ptr + 1, data_stack_ptr, CELLS(cc));
	*data_stack_ptr = tmp;
}

/*
 * Native PICK
 */
static void
pick(void)
{
	ucell cc = pop();

	push(look(cc));
}

/*
 * Native ALLOT
 * We check that the memory may indeed be allocated: when the dictionary
 * area grows, it must leave enough room for the PAD, which must not
 * overlap with the source buffer.
 */
static void
allot(cell num)
{
	ucell here;

	if (num == 0)
		return;
	here = FETCH(FA_HERE);
	if (here > DICTIONARY_LEN)
		panic("HERE corrupted");
	if (num > 0
		&& ((ucell)num + here + PAD_LEN) > (ucell)FETCH(FA_SOURCE_PTR))
		panic("dictionary is full");
	STORE(FA_HERE, here + num);
}

/*
 * Native ALIGN
 */
static void
align(void)
{
	ucell here = FETCH(FA_HERE);
	ucell ua = here % CELLSIZE;

	if (ua == 0)
		return;
	allot(CELLSIZE - ua);
}

/*
 * Compute an aligned address: this is the first cell-aligned address
 * greater than or equal to the provided address.
 */
static ucell
align_address(ucell addr)
{
	ucell ua = addr % CELLSIZE;

	if (ua == 0)
		return addr;
	return addr + CELLSIZE - ua;
}

/*
 * Words.
 *
 * A word is defined by a header in the dictionary area. The header
 * contains the following data:
 *
 *   name length    1 cell    name length in characters
 *   CFA-I          1 cell    interpretation semantics
 *   CFA-C          1 cell    compilation semantics
 *   LFA            1 cell    link field (for wordlists)
 *   name           x cells   name + padding (for alignment)
 *
 * CFA-I points to the interpretation code.
 * CFA-C points to the compilation code. If that field contains 0, then
 * the word has default compilation semantics. An immediate word is
 * a word where CFA-C contains the same value than CFA-I.
 * LFA is a pointer use to keep words linked in the wordlist.
 *
 * Following the header is the name itself.
 *
 * Older Forth (pre-ANS) had a completely specified word header format,
 * and application code relied on it. ANS Forth has thrown all this into
 * the "ambiguous condition" dark area, mostly so that native code generator
 * could be implemented. The word header we specify here is NOT the one used
 * in older Forth implementations. Traditionnal Forth systems use a unique
 * CFA and an "immediate" flag, but this leads to problems when a word such
 * as TO must be defined: it has both interpretation and non-default
 * compilation semantics. With only one CFA and one immediate flag, TO
 * can only be implemented as a "state-smart" immediate word (a word which
 * tests the value of STATE); such words may fail when ticked.
 */

/*
 * Those macros contain the word header field offsets within the header.
 * No code (neither C nor Forth) relies on the precise field order.
 */
#define WH_NAMELEN  0
#define WH_CFA_I    CELLS(1)
#define WH_CFA_C    CELLS(2)
#define WH_LFA      CELLS(3)
#define WH_NAME     CELLS(4)

/*
 * An execution token (xt) is a cell value which can reference:
 * -- a native word implementation
 * -- a non-native word implementation
 * -- an opcode
 *
 * Opcodes are special xts which cannot appear as such at application
 * level; they have meaning only to the interpreter.
 *
 * The xt for a non-native word implementation is simply the code address:
 * thet is the address of the first code cell. A non-native code is a
 * list of consecutive cells, each containing an xt. Therefore, all such
 * xts are even values.
 *
 * xt for native words and opcodes have odd values. For native words,
 * (xt >> 1) is the native word index in a table (defined below). Opcodes
 * use special values (when converted to "cell", opcodes assume negative
 * values).
 */

/*
 * Opcodes.
 *
 * OP_EXIT takes no argument. When encountered, the current word is exited.
 *
 * OP_JMP takes one argument, which is a displacement value. The new ip
 * value is the sum of the argument address and the argument value
 * (hence, if the argument is equal to CELLSIZE, this opcode is a nop).
 *
 * OP_IF takes one argument, with the same semantics than OP_JMP. However,
 * OP_IF pops one value from the data stack, and performs the jump if
 * and only if the popped value is 0. Otherwise, execution continues just
 * after the OP_IF argument cell.
 *
 * OP_LIT takes one argument, which is pushed on the data stack.
 *
 * OP_ALIT takes one argument and jumps like OP_JMP; moreover, it pushes
 * on the data stack the address of the cell immediately following its
 * argument cell. OP_ALIT is used for embedding static strings (such as
 * those obtained with S" ).
 *
 * OP_BODY takes one argument, which is an xt. It first pushes on the
 * data stack the address of the cell immediately following its argument
 * cell, and then _jumps_ to the xt-designated code address. Note that
 * this is not a word call (with ip pushed on the returned stack) but a
 * full branch. Newly CREATEd words begin with an OP_BODY and a argument
 * of 0, which points to the cell 0 where an opcode OP_EXIT is stored.
 * DOES> updates the OP_BODY argument with a new xt. OP_BODY is never
 * encountered except as the very first opcode of CREATEd words.
 *
 * OP_EXECUTE takes no argument. It pops one xt from the data stack,
 * and executes the corresponding word.
 */

#define OP_EXIT     -1   /* exit from the current word */
#define OP_JMP      -3   /* unconditionnal branch */
#define OP_IF       -5   /* conditionnal branch (consumes a flag from stack) */
#define OP_LIT      -7   /* embedded literal value */
#define OP_ALIT     -9   /* combined embedded value and branch */
#define OP_BODY     -11  /* initiator for CREATEd words */
#define OP_EXECUTE  -13  /* used for EXECUTE */

/*
 * Defined further (native word invocation).
 */
static void native_exec(ucell cfa);

/*
 * Execute the provided xt, which may be either native or not (but it
 * must not be an opcode).
 */
static void
interp(ucell ip)
{
	cell *saved_rp;

	if (ip & 1) {
		native_exec(ip >> 1);
		return;
	}
	saved_rp = return_stack_ptr;
	for (;;) {
		ucell xt = fetch(ip);

		ip += CELLSIZE;
		if (xt & 1) {
			cell op = (cell)xt;

			if (op >= 0) {
				native_exec(xt >> 1);
			} else switch (op) {
				ucell arg;

			case OP_EXIT:
				if (return_stack_ptr >= saved_rp)
					return;
				ip = pop_ret();
				break;
			case OP_JMP:
				arg = fetch(ip);
				ip += arg;
				break;
			case OP_IF:
				arg = fetch(ip);
				if (pop())
					ip += CELLSIZE;
				else
					ip += arg;
				break;
			case OP_LIT:
				arg = fetch(ip);
				push(arg);
				ip += CELLSIZE;
				break;
			case OP_ALIT:
				push(ip + CELLSIZE);
				arg = fetch(ip);
				ip += arg;
				break;
			case OP_BODY:
				push(ip + CELLSIZE);
				arg = fetch(ip);
				ip = arg;
				break;
			case OP_EXECUTE:
				arg = pop();
				if (arg & 1) {
					native_exec(arg >> 1);
				} else {
					push_ret(ip);
					ip = arg;
				}
				break;
			default:
				panic("invalid opcode: " XCELL, xt);
			}
		} else {
			push_ret(ip);
			ip = xt;
		}
	}
}

/*
 * Create a word header, with the provided name. Return value is the
 * newly created header address. The word is not yet linked in the
 * current wordlist.
 *
 * CFA-I is initialized to 0. Since the cell at address 0 contains the
 * constant OP_EXIT value, the default word interpretation semantics
 * are to do nothing at all.
 *
 * CFA-C is initialized to 0. This means default compilation semantics.
 */
static ucell
create_header(unsigned char *name, size_t name_len)
{
	ucell addr, len;
	unsigned char tmp[WH_NAME], *buf;
	size_t tmp_len = sizeof tmp;

	/*
	 * We arbitrarily limit the name length (to an utterly absurd
	 * value) in order to avoid some overflow problems.
	 */
	if (name_len > (CELLMAX / 4))
		panic("word name too long");

	/*
	 * We store the name beginning in a temporary buffer in order
	 * to handle word creation when the header itself overwrites
	 * the name buffer. I'm not sure such a situation is unambiguously
	 * allowed by ANS Forth anyway.
	 */
	if (tmp_len > name_len)
		tmp_len = name_len;
	memcpy(tmp, name, tmp_len);

	align();
	addr = FETCH(FA_HERE);
	len = WH_NAME + align_address(name_len);
	allot(len);
	STORE(addr + WH_NAMELEN, name_len);
	STORE(addr + WH_CFA_I, 0);
	STORE(addr + WH_CFA_C, 0);
	STORE(addr + WH_LFA, 0);

	buf = native_address(addr + WH_NAME);
	if (name_len > sizeof tmp)
		memmove(buf + sizeof tmp, name + sizeof tmp,
			name_len - sizeof tmp);
	memcpy(buf, tmp, tmp_len);

#ifdef DEBUG
	fprintf(stderr, "created header for {");
	fwrite(name, 1, name_len, stderr);
	fprintf(stderr, "} at " XCELL "\n", addr);
#endif
	return addr;
}

/*
 * Wordlists.
 *
 * A wordlist is just a structure indexing a collection of words. It
 * acts as a hash table; the WL_BUCKET_NUM macro contains the number
 * of "buckets" in the table. All words in the same bucket are organized
 * as a linked list through their respective LFA fields (the address 0
 * is the list terminator).
 *
 * Wordlists contain also a pointer to the next list in the current
 * search order.
 *
 * Wordlist memory layout:
 *    pointer to next wordlist    1 cell
 *    pointer to words            WL_BUCKET_NUM cells
 */

#define WL_BUCKET_NUM     32   /* should be a power of 2 */

/*
 * Get the canonical value for a character; this converts lowercase ASCII
 * characters to uppercase.
 */
static unsigned
ccanon(unsigned c)
{
	if (c >= 97 && c <= 122)
		return c - 32;
	else
		return c;
}

/*
 * Compare two strings (ASCII case-insensitive). Return value is 1 if
 * both strings are equal, 0 otherwise.
 */
static int
eqname(unsigned char *b1, unsigned char *b2, size_t len)
{
	while ((len --) > 0) {
		unsigned c1 = ccanon(*b1 ++), c2 = ccanon(*b2 ++);

		if (c1 != c2)
			return 0;
	}
	return 1;
}

/*
 * Hash a string into an 11-bit value. This function is ASCII
 * case-insensitive. The hash function is remotely inspired from
 * the standard ELF hash function.
 */
static unsigned
hash_string(unsigned char *b, size_t len)
{
	unsigned h = 0;

	while ((len --) > 0) {
		unsigned g;
		unsigned c = ccanon(*b ++);

		h = (h << 5) + (0xFF & ~c);
		g = h & 0xF800U;
		h ^= g >> 11;
		h &= ~g;
	}
	return h;
}

/*
 * Find a word in a wordlist. Returned value is either the word header
 * address, or 0 if the word was not found. The "bucket" value should
 * be computed from the hashed name.
 *
 * Note: Malevolent tinkering with the dictionary area could fool this
 * function into an infinite loop.
 */
static ucell
lookup_word(ucell wordlist, unsigned char *name, size_t name_len,
	unsigned bucket)
{
	ucell ptr;

	ptr = fetch(wordlist + CELLS(1 + bucket));
	while (ptr != 0) {
		ucell len = fetch(ptr + WH_NAMELEN);

		if (len == name_len) {
			ucell nfa = ptr + WH_NAME;

			check_range(nfa, len);
			if (eqname(name, native_address(nfa), len))
				return ptr;
		}
		ptr = fetch(ptr + WH_LFA);
	}
	return 0;
}

/*
 * Find a word by name, trying all wordlists in the current search
 * order. Return value is the word header address, or 0 if the word
 * is not found.
 */
static ucell
find_word(ucell name, ucell name_len)
{
	ucell wl = FETCH(FA_CURRENT_SEARCH);
	unsigned char *rname;
	unsigned bucket;

	check_range(name, name_len);
	rname = native_address(name);
	bucket = hash_string(rname, name_len) % WL_BUCKET_NUM;

	while (wl != 0) {
		ucell wa = lookup_word(wl, rname, name_len, bucket);

		if (wa != 0)
			return wa;
		wl = fetch(wl);
	}
	return 0;
}

/*
 * Link the provided word into a wordlist.
 */
static void
link_word(ucell wordlist, ucell wa)
{
	ucell nfa, len, tmp;
	unsigned u;

	len = fetch(wa + WH_NAMELEN);
	nfa = wa + WH_NAME;
	check_range(nfa, len);
	u = hash_string(native_address(nfa), len) % WL_BUCKET_NUM;
	tmp = wordlist + CELLS(1 + u);
	store(wa + WH_LFA, fetch(tmp));
	store(tmp, wa);

#ifdef DEBUG
	fprintf(stderr, "linking " XCELL ": {", wa);
	logword(nfa, len);
	fprintf(stderr, "}\n");
#endif
}

/*
 * String to integer conversion. The characters pointed to by "str",
 * and up to "*len" characters, are processed according to the provided
 * base: for each character which is valid (relatively to the base),
 * "orig" is multiplied by "base" and the character value is added to
 * "orig".
 *
 * Processing stops at the end of the string or at the first invalid
 * character. The function returns the value of "orig" finally reached.
 * When the function returns, "*len" contains the number of unprocessed
 * characters.
 *
 * "base" must have a value between 2 and 36 inclusive (this is not
 * verified by this function). ASCII letters A to Z (or a to z -- this
 * function is not case-sensitive) correspond to values 10 to 35.
 */
static ucell
to_number(ucell orig, unsigned char *str, size_t *len, int base)
{
	while (*len > 0) {
		int c = *str ++;

		if (c >= 48 && c <= 57)
			c -= 48;
		else if (c >= 65 && c <= 90)
			c -= 55;
		else if (c >= 97 && c <= 122)
			c -= 87;
		else
			break;
		if (c >= base)
			break;
		(*len) --;
		orig = orig * (ucell)base + (ucell)c;
	}
	return orig;
}

/*
 * "Compile" a cell: a new cell is allocated and the provided value is
 * stored in that cell. The cell address is returned.
 */
static ucell
compile(cell value)
{
	ucell addr = FETCH(FA_HERE);

	allot(CELLSIZE);
	store(addr, value);
	return addr;
}

/* ======================================================================= */
/*
 * I/O code.
 *
 * For each open file accessible from the Forth code, we keep a C "FILE *"
 * pointer. A file identifier is just an index in that table. We allow
 * for a maximum of 256 simultaneously open files (but the underlying OS
 * may restrict us further).
 */

#define OPENFILE_MAX   256

/*
 * This array represents the open files; free array entries contain NULL.
 */
static FILE *file[OPENFILE_MAX];

/*
 * Initialize the file[] array.
 */
static void
file_init(void)
{
	int i;

	for (i = 0; i < OPENFILE_MAX; i ++)
		file[i] = NULL;
}

/*
 * Open a file. Return value is -1 if the file cannot be opened.
 * Note: in standard C, we cannot open a file for just writing to
 * it (_not_ appending) without truncating it. For such a combination,
 * we open the file for both reading and writing.
 */

#define FF_RO     0x0001U          /* read-only access */
#define FF_WO     0x0002U          /* write-only access */
#define FF_RW     (FF_RO | FF_WO)  /* read-write access */
#define FF_BIN    0x0004U  /* binary access (useless in most Unix systems) */
#define FF_CTR    0x0008U  /* create if absent, truncate if present */

static int
file_open(unsigned char *name, size_t name_len, unsigned flags)
{
	char tmp[FILENAME_MAX];
	int i;
	char am[10] = "";
	int mw, mr;

	/*
	 * We must copy the file name to a temporary buffer because
	 * the C library requires a terminating 0.
	 */
	if (name_len > (sizeof tmp) - 1)
		return -1;
	memcpy(tmp, name, name_len);
	tmp[name_len] = 0;

	/*
	 * We reject attempts at opening file names with embedded 0.
	 */
	if (strlen(tmp) != name_len)
		return -1;

	/*
	 * We find the first free file[] array entry.
	 */
	for (i = 0; i < OPENFILE_MAX; i ++)
		if (file[i] == NULL)
			break;
	if (i == OPENFILE_MAX)
		return -1;

	mr = (flags & FF_RO) != 0;
	mw = (flags & FF_WO) != 0;
	if (flags & FF_CTR) {
		if (mr && mw)
			strcpy(am, "w+");
		else if (mw)
			strcpy(am, "w");
		else if (mr)
			strcpy(am, "r");
		else
			return -1;
	} else {
		strcpy(am, mw ? "r+" : "r");
	}
	if (flags & FF_BIN)
		strcat(am, "b");
	file[i] = fopen(tmp, am);
	if (file[i] == NULL)
		return -1;
	else
		return i;
}

/*
 * Close a file. Return value is 0 on success, -1 on error. This
 * function handles correctly (i.e., by returning an error, not by
 * crashing) spurious closes on unopened files.
 */
static int
file_close(int i)
{
	int r;

	if (i < 0 || i >= OPENFILE_MAX)
		return -1;
	if (file[i] == NULL)
		return -1;
	r = fclose(file[i]);
	file[i] = NULL;
	return r == 0 ? 0 : -1;
}

/*
 * Read a line. The buffer provided should have length at least "len + 1".
 *
 * Return value:
 *   -3       invalid file ID
 *   -2       I/O error
 *   -1       EOF reached, nothing read
 *    n >= 0  line length, not counting the terminator
 *
 * If the returned value is equal to "len", then the terminator was not
 * reached.
 */
static long
file_readline(int f, unsigned char *buf, size_t len)
{
	size_t n;

	if (f < 0 || f >= OPENFILE_MAX || file[f] == NULL)
		return -3;
	if (fgets(buf, len + 1, file[f]) == NULL)
		return ferror(file[f]) ? -2 : -1;
	n = strlen(buf);
	if (n > 0 && buf[n - 1] == '\n')
		buf[-- n] = 0;
	return (long)n;
}

/* ======================================================================= */
/*
 * Native words. Each word is implemented as a C function which takes
 * no argument and returns nothing.
 *
 * Some of those words could easily have been implemented in pure Forth
 * just by using the other native words, but were coded as native words
 * for optimization or aesthetic reasons.
 */

/* @ */
static void
native_fetch(void)
{
	push(fetch(pop()));
}

/* C@ */
static void
native_fetch8(void)
{
	push(fetch8(pop()));
}

/* ! */
static void
native_store(void)
{
	ucell addr = pop();

	store(addr, pop());
}

/* C! */
static void
native_store8(void)
{
	ucell addr = pop();

	store8(addr, pop());
}

/* >R */
static void
native_toR(void)
{
	push_ret(pop());
}

/* R> */
static void
native_Rfrom(void)
{
	push(pop_ret());
}

/* R@ */
static void
native_Rfetch(void)
{
	push(look_ret(0));
}

/* + */
static void
native_plus(void)
{
	cell b = pop();
	cell a = pop();

	push(a + b);
}

/* - */
static void
native_minus(void)
{
	cell b = pop();
	cell a = pop();

	push(a - b);
}

/* * */
static void
native_star(void)
{
	ucell b = pop();
	ucell a = pop();

	push(a * b);
}

/* /MOD */
static void
native_slashmod(void)
{
	cell d = pop();
	cell m = pop();

	if (d == 0)
		panic("division by 0");
#if CELLMAX == -(1 + CELLMIN)
	/*
	 * When two's complement is used with no trap representation, then
	 * a signed division of CELLMIN by -1 yields an integer overflow
	 * (-CELLMIN is strictly greater than CELLMAX). Some processors
	 * (e.g., i386+) react to such an occurrence with an exception,
	 * just like they would do for a division by 0. Hence this test,
	 * which avoids an ugly crash.
	 */
	if (m == CELLMIN && d == -1)
		panic("overflow in division by -1");
#endif
	push(m % d);
	push(m / d);
}

/* U/MOD */
/*
 * This word is not part of ANS Forth. Stack diagram:
 *   u1 u2 -- u3 u4
 * where u4 is the quotient of u1 by u2 and u3 is the remainder.
 */
static void
native_uslashmod(void)
{
	ucell d = pop();
	ucell m = pop();

	if (d == 0)
		panic("division by 0");
	push(m % d);
	push(m / d);
}

/* 0< */
static void
native_zeroless(void)
{
	cell v = pop();

	push(v < 0 ? (ucell)-1 : (ucell)0);
}

/* 0= */
static void
native_zeroequals(void)
{
	cell v = pop();

	push(v == 0 ? (ucell)-1 : (ucell)0);
}

/* 2/ */
static void
native_twoslash(void)
{
	cell v = pop();

	/*
	 * On most architectures, right-shifting by 1 bit would work,
	 * because duplicating the sign bit is what they do internally
	 * and also what is most useful. But the C standard does not
	 * mandate it, hence this test and manual bit duplication.
	 */
	if (v < 0) {
		ucell u = v;

		push((u >> 1) | ((ucell)-1 - ((ucell)-1 >> 1)));
	} else {
		push((ucell)v >> 1);
	}
}

/* < */
static void
native_lessthan(void)
{
	cell v2 = pop();
	cell v1 = pop();

	push(v1 < v2 ? (ucell)-1 : 0);
}

/* U< */
static void
native_ulessthan(void)
{
	ucell u2 = pop();
	ucell u1 = pop();

	push(u1 < u2 ? (ucell)-1 : 0);
}

/* = */
static void
native_equals(void)
{
	cell v2 = pop();
	cell v1 = pop();

	push(v1 == v2 ? (ucell)-1 : 0);
}

/* ALLOT */
static void
native_allot(void)
{
	allot(pop());
}

/* AND */
static void
native_and(void)
{
	ucell u2 = pop();
	ucell u1 = pop();

	push(u1 & u2);
}

/* OR */
static void
native_or(void)
{
	ucell u2 = pop();
	ucell u1 = pop();

	push(u1 | u2);
}

/* XOR */
static void
native_xor(void)
{
	ucell u2 = pop();
	ucell u1 = pop();

	push(u1 ^ u2);
}

/* INVERT */
static void
native_invert(void)
{
	push(~(ucell)pop());
}

/* CELL+ */
static void
native_cellplus(void)
{
	push(pop() + CELLSIZE);
}

/* CELLS */
static void
native_cells(void)
{
	push(pop() * CELLSIZE);
}

/* FILL */
static void
native_fill(void)
{
	cell v = (ucell)pop() & 0xFF;
	ucell cc = pop();
	ucell addr = pop();

	if (cc == 0)
		return;
	check_range(addr, cc);
	memset(native_address(addr), v, cc);
}

/* MOVE */
static void
native_move(void)
{
	ucell cc = pop();
	ucell dst = pop();
	ucell src = pop();

	if (cc == 0)
		return;
	check_range(src, cc);
	check_range(dst, cc);
	memmove(native_address(dst), native_address(src), cc);
}

/* LSHIFT */
static void
native_lshift(void)
{
	ucell cc = pop();

	push((ucell)pop() << cc);
}

/* RSHIFT */
static void
native_rshift(void)
{
	ucell cc = pop();

	push((ucell)pop() >> cc);
}

/* DROP */
static void
native_drop(void)
{
	(void)pop();
}

/* PARSE */
static void
native_parse(void)
{
	unsigned c = pop() & 0xFF;
	ucell source = FETCH(FA_SOURCE_PTR);
	ucell len = FETCH(FA_SOURCE_LEN);
	ucell ptr = FETCH(FA_TOIN);
	ucell addr;

	check_range(source, len);
	addr = source + ptr;
	while (ptr < len) {
		unsigned d = *(unsigned char *)native_address(source + ptr);

		/*
		 * If the delimiter is 0x20 (SPC), we consider all "control"
		 * characters to be delimiters. This shall match tabulations,
		 * carriage returns and so on. ANS Forth allows this, and
		 * this is the most useful behaviour.
		 */
		if (c == 0x20 && d <= 0x20)
			break;
		if (c == d)
			break;
		ptr ++;
	}
	push(addr);
	push(source + ptr - addr);
	if (ptr < len)
		ptr ++;
	STORE(FA_TOIN, ptr);
}

/* PARSE-WORD */
/*
 * PARSE-WORD is not part of the ANS Forth stricto sensu, but it is cited
 * there, and also common and useful. PARSE-WORD skips leading blanks,
 * then parses a blank-delimited word and returns it as "c-addr u".
 * If the word is not found, the returned length is 0 and the returned
 * address has no meaning (this implementation returns 0 as address).
 */
static void
native_parseword(void)
{
	ucell source = FETCH(FA_SOURCE_PTR);
	ucell len = FETCH(FA_SOURCE_LEN);
	ucell ptr = FETCH(FA_TOIN);

	check_range(source, len);
	while (ptr < len) {
		unsigned d = *(unsigned char *)native_address(source + ptr);

		if (d > 32)
			break;
		ptr ++;
	}
	if (ptr > len)
		ptr = len;
	STORE(FA_TOIN, ptr);
	if (ptr >= len) {
		push(0);
		push(0);
	} else {
		push(0x20);
		native_parse();
	}
}

/* FIND-WORD */
/*
 * FIND-WORD is equivalent to FIND except that it uses a "c-addr u"
 * string format. Stack diagram: ( c-addr u -- c-addr u 0 | xt 1 | xt -1 )
 * This word is not part of ANS Forth, but it is defined in Gforth.
 */
static void
native_findword(void)
{
	ucell len = pop();
	ucell addr = pop();
	ucell wa;

	wa = find_word(addr, len);
	if (wa != 0) {
		ucell xti = fetch(wa + WH_CFA_I);
		ucell xtc = fetch(wa + WH_CFA_C);

		push(xti);
		push(xtc == 0 ? -1 : 1);
	} else {
		push(addr);
		push(len);
		push(0);
	}
}

/* FIND-WORD-HEADER */
/*
 * This word is not part of ANS Forth. Its stack diagram is the following:
 *   c-addr u -- 0 | addr -1
 * It returns 0 if the word is not found; otherwise, it returns the word
 * address header, followed by -1.
 */
static void
native_findheader(void)
{
	ucell len = pop();
	ucell addr = pop();
	ucell wa;

	wa = find_word(addr, len);
	if (wa != 0) {
		push(wa);
		push(-1);
	} else {
		push(0);
	}
}

/* REFILL */
static void
native_refill(void)
{
	/*
	 * The source buffer is located at the end of the dictionary
	 * space, just below the two stacks. When we read a line, we
	 * use as much buffer as we need, but we may have to give up
	 * if the line is too long. The buffer size is repreatedly
	 * doubled, until end-of-line is reached, or an error happens.
	 */
	int f = FETCH(FA_SOURCE_ID);
	int cr = 0;
	ucell lim = FETCH(FA_HERE) + PAD_LEN;
	size_t len = MIN_LINE_LEN + 1, old_len = 0;

	/*
	 * If the current source is a string (negative source ID), we
	 * just bail out immediately, as mandated by ANS Forth.
	 */
	if (f < 0) {
		push(0);
		return;
	}

	/*
	 * "len" contains the current buffer (total) length.
	 * "old_len" contains the previous buffer total length.
	 */
	for (;;) {
		long rl;
		unsigned char *buf;
		size_t tlen, plen;

		if (len > RESERVED_BEGIN || lim >= (RESERVED_BEGIN - len))
			panic("not enough room for REFILL");
		buf = native_address(RESERVED_BEGIN - len);
		if (old_len > 0) {
			unsigned char *old_buf;

			old_buf = native_address(RESERVED_BEGIN - old_len);
			memmove(buf, old_buf, old_len);
			plen = old_len - 1;
			buf += plen;
		} else {
			plen = 0;
		}
		tlen = len - plen;
		rl = file_readline(f, buf, tlen - 1);
		if (rl < -1)
			panic("I/O error (%ld) while reading input stream", rl);
		if (rl == -1)
			rl = 0;
		else
			cr = 1;
		if ((size_t)rl + 1 == tlen) {
			old_len = len;
			len = 2 * len;
			continue;
		}
		if (cr) {
			STORE(FA_SOURCE_PTR, RESERVED_BEGIN - len);
			STORE(FA_SOURCE_LEN, (size_t)rl + plen);
			STORE(FA_TOIN, 0);
			push(-1);
		} else {
			push(0);
		}
		return;
	}
}

/* CREATE */
static void
native_create(void)
{
	ucell addr, len, wa, cfa;

	native_parseword();
	len = pop();
	if (len == 0)
		panic("CREATE used with a zero-length name");
	addr = pop();
	check_range(addr, len);
	wa = create_header(native_address(addr), len);
	cfa = compile(OP_BODY);
	store(wa + WH_CFA_I, cfa);
	compile(0);
	link_word(FETCH(FA_CURRENT_LIST), wa);
	STORE(FA_LAST_WORD, wa);
}

/* , */
static void
native_comma(void)
{
	compile(pop());
}

/* : */
static void
native_colon(void)
{
	ucell addr, len, wa, xt;

	native_parseword();
	len = pop();
	if (len == 0)
		panic(": used with a zero-length name");
	addr = pop();
	check_range(addr, len);

#ifdef DEBUG
	fprintf(stderr, "defining new word: {");
	logword(addr, len);
	fprintf(stderr, "}\n");
#endif

	wa = create_header(native_address(addr), len);
	xt = FETCH(FA_HERE);
	STORE(wa + WH_CFA_I, xt);

	/*
	 * Reference this word xt as the current word (for RECURSE).
	 * Note that this is the xt, not the word header address.
	 */
	STORE(FA_CURRENT_XT, xt);

	/*
	 * Go to compilation state.
	 */
	STORE(FA_STATE, -1);

	/*
	 * Push the word address on the data stack. This is used by ; when
	 * ending the word definition.
	 */
	push(wa);

#ifdef DEBUG
	fprintf(stderr, "leaving colon-sys, depth = " UCELL "\n",
		data_stack_depth());
#endif
}

/* ;NO-TERMINATOR */
/*
 * This word is not part of ANS Forth. It acts like a plain ; except that
 * it does not compile an OP_EXIT opcode into the current definition.
 * This is used when you define a word with externally supplied
 * interpretation or compilation semantics.
 */
static void
native_semicolonNT(void)
{
	ucell cs = pop();
	if (cs == 0) {
		push(FETCH(FA_CURRENT_XT));
	} else {
		link_word(FETCH(FA_CURRENT_LIST), cs);
		STORE(FA_LAST_WORD, cs);
	}
	STORE(FA_STATE, 0);
}

/* ; */
static void
native_semicolon(void)
{
#ifdef DEBUG
	fprintf(stderr, "end of word definition, depth = " UCELL "\n",
		data_stack_depth());
#endif
	compile(OP_EXIT);
	native_semicolonNT();
}

/* ( */
static void
native_lpar(void)
{
	for (;;) {
		ucell addr, len, tin;

		addr = FETCH(FA_SOURCE_PTR);
		len = FETCH(FA_SOURCE_LEN);
		tin = FETCH(FA_TOIN);
		check_range(addr, len);
		if (tin < len) {
			size_t u;
			unsigned char *buf = native_address(addr);

			for (u = tin; u < len; u ++) {
				if (buf[u] == ')') {
					STORE(FA_TOIN, u + 1);
					return;
				}
			}
		}
		native_refill();
		if (pop() == 0)
			return;
	}
}

/* ABORT */
/*
 * FIXME: ABORT must not actually exit the system, but simply flush the
 * stacks and get back to interpreter mode.
 */
static void
native_abort(void)
{
	panic("ABORT");
}

/* EMIT */
static void
native_emit(void)
{
	ucell c = pop();

	putchar(c & 0xFF);
}

/* TYPE */
static void
native_type(void)
{
	ucell len = pop();
	ucell addr = pop();

	check_range(addr, len);
	fwrite(native_address(addr), 1, len, stdout);
}

/* >NUMBER */
static void
native_tonumber(void)
{
	ucell len = pop();
	ucell addr = pop();
	ucell orig = pop();
	cell base = FETCH(FA_BASE);
	size_t nlen;

	if (base < 2 || base > 36)
		panic("invalid base: " DCELL, base);
	check_range(addr, len);
	nlen = len;
	orig = to_number(orig, native_address(addr), &nlen, base);
	push(orig);
	push(addr + (len - nlen));
	push(nlen);
}

#if 0
/* obsolete */
/* . */
static void
native_dot(void)
{
	printf(" " DCELL, pop());
}

/* U. */
static void
native_udot(void)
{
	printf(" " UCELL, (ucell)pop());
}
#endif

/* DEPTH */
static void
native_depth(void)
{
	push(data_stack_depth());
}

/* J */
static void
native_J(void)
{
	push(look_ret(2));
}

/* #! */
/*
 * This word is not part of ANS Forth. It acts like \ as a comment
 * introducer; it is used for writing Forth source code as a Unix script.
 */
static void
native_sharpbang(void)
{
	STORE(FA_TOIN, FETCH(FA_SOURCE_LEN));
}

/*
 * This table holds pointers to the native word definitions. For each
 * word, we have the word name (as a C string) and the word interpretation
 * semantics (a function pointer). If the name is NULL, then the provided
 * function is actually the (special) compilation semantics for the
 * previous word.
 *
 * The name is only used at startup, for building the native word table.
 */

static struct {
	char *name;
	void (*fun)(void);
} nwords[] = {
	{ "@",                    native_fetch           },
	{ "C@",                   native_fetch8          },
	{ "!",                    native_store           },
	{ "C!",                   native_store8          },
	{ ">R",                   native_toR             },
	{ "R>",                   native_Rfrom           },
	{ "R@",                   native_Rfetch          },
	{ "SWAP",                 swap                   },
	{ "DROP",                 native_drop            },
	{ "DUP",                  dup                    },
	{ "ROT",                  rot                    },
	{ "-ROT",                 notrot                 },
	{ "OVER",                 over                   },
	{ "ROLL",                 roll                   },
	{ "PICK",                 pick                   },
	{ "+",                    native_plus            },
	{ "-",                    native_minus           },
	{ "*",                    native_star            },
	{ "/MOD",                 native_slashmod        },
	{ "U/MOD",                native_uslashmod       },
	{ "0<",                   native_zeroless        },
	{ "0=",                   native_zeroequals      },
	{ "2/",                   native_twoslash        },
	{ "<",                    native_lessthan        },
	{ "=",                    native_equals          },
	{ "U<",                   native_ulessthan       },
	{ "ALIGN",                align                  },
	{ "ALLOT",                native_allot           },
	{ "AND",                  native_and             },
	{ "OR",                   native_or              },
	{ "XOR",                  native_xor             },
	{ "INVERT",               native_invert          },
	{ "CELL+",                native_cellplus        },
	{ "CELLS",                native_cells           },
	{ "FILL",                 native_fill            },
	{ "MOVE",                 native_move            },
	{ "LSHIFT",               native_lshift          },
	{ "RSHIFT",               native_rshift          },
	{ "PARSE",                native_parse           },
	{ "PARSE-WORD",           native_parseword       },
	{ "FIND-WORD",            native_findword        },
	{ "FIND-WORD-HEADER",     native_findheader      },
	{ "REFILL",               native_refill          },
	{ "CREATE",               native_create          },
	{ ":",                    native_colon           },
	{ ";",                    native_semicolon       },
	{ 0,                      native_semicolon       },
	{ ";NO-TERMINATOR",       native_semicolonNT     },
	{ 0,                      native_semicolonNT     },
	{ "(",                    native_lpar            },
	{ 0,                      native_lpar            },
	{ ",",                    native_comma           },
	{ "ABORT",                native_abort           },
	{ "EMIT",                 native_emit            },
	{ "TYPE",                 native_type            },
	{ ">NUMBER",              native_tonumber        },
#if 0
/* obsolete */
	{ ".",                    native_dot             },
	{ "U.",                   native_udot            },
#endif
	{ "DEPTH",                native_depth           },
	{ "J",                    native_J               },
	{ "#!",                   native_sharpbang       },
};

static void
native_exec(ucell cfa)
{
	if (cfa > (sizeof nwords) / (sizeof nwords[0]))
		panic("illegal native word call: " UCELL, cfa);
	nwords[cfa].fun();
}

/*
 * Pre-defined constants. Those constants are accessible as Forth words
 * and are used to communicate some important data to the Forth-implemented
 * system words (e.g., reserved memory layout).
 */
static struct {
	char *name;
	ucell value;
} nconsts[] = {
	{ "BASE",               FA_BASE               },
	{ "HERE-PTR",           FA_HERE               },
	{ "CURRENT-XT",         FA_CURRENT_XT         },
	{ "LAST-WORD",          FA_LAST_WORD          },
	{ "SOURCE-PTR",         FA_SOURCE_PTR         },
	{ "SOURCE-LEN-PTR",     FA_SOURCE_LEN         },
	{ "SOURCE-ID-PTR",      FA_SOURCE_ID          },
	{ ">IN",                FA_TOIN               },
	{ "STATE",              FA_STATE              },
	{ "CURRENT-LIST-PTR",   FA_CURRENT_LIST       },
	{ "CURRENT-SEARCH-PTR", FA_CURRENT_SEARCH     },

	{ "DATA-STACK-BASE",    DATA_STACK_END        },
	{ "RETURN-STACK-BASE",  RETURN_STACK_END      },
	{ "CELLSIZE",           CELLSIZE              },
	{ "WORDLIST-BUCKET#",   WL_BUCKET_NUM         },

	{ "WH-NAMELEN",         WH_NAMELEN            },
	{ "WH-CFA-I",           WH_CFA_I              },
	{ "WH-CFA-C",           WH_CFA_C              },
	{ "WH-LFA",             WH_LFA                },
	{ "WH-NAME",            WH_NAME               },

	{ "OPCODE-EXIT",        OP_EXIT               },
	{ "OPCODE-JMP",         OP_JMP                },
	{ "OPCODE-IF",          OP_IF                 },
	{ "OPCODE-LIT",         OP_LIT                },
	{ "OPCODE-ALIT",        OP_ALIT               },
	{ "OPCODE-BODY",        OP_BODY               },
	{ "OPCODE-EXECUTE",     OP_EXECUTE            },
};

/*
 * Create a constant. We define constants as words whose code consists in
 * only an OP_LIT opcode followed by OP_EXIT.
 */
static void
create_constant(ucell wl, char *name, ucell value)
{
	ucell wa = create_header(name, strlen(name));

	STORE(wa + WH_CFA_I, FETCH(FA_HERE));
	compile(OP_LIT);
	compile(value);
	compile(OP_EXIT);
	link_word(wl, wa);
	/*
	 * Note: we do not register those constants in the
	 * "current word" and "last word" fields.
	 */
}

/*
 * Toplevel interpreter/compiler.
 */
static void
toplevel(void)
{
#ifdef DEBUG
	fprintf(stderr, "entering toplevel, depth = " UCELL "\n",
		data_stack_depth());
#endif
	for (;;) {
		ucell wa;
		ucell addr, len;

		for (;;) {
			native_parseword();
			if (look(0) != 0)
				break;

			/*
			 * No word found: we need to refill the buffer.
			 * If the refill failed, then we are at the end
			 * of that input source, and we exit.
			 */
			pop();
			pop();
			native_refill();
			if (!pop())
				return;
		}

		len = pop();
		addr = pop();

		/*
		 * Now we look for the word.
		 */
		wa = find_word(addr, len);

		if (wa == 0) {
			/*
			 * The word was not found. We try to use the
			 * word as a number.
			 */
			int fc;

			fc = fetch8(addr);
			if (fc == 0x2D) {
				/*
				 * 0x2D is ASCII '-'
				 */
				addr ++;
				len --;
				if (len == 0)
					panic("word '-' is unknown !");
				fc = 1;
			} else {
				fc = 0;
			}
			push(0);
			push(addr);
			push(len);
			native_tonumber();
			if (pop() != 0) {
				/*
				 * Conversion was not complete. This is not
				 * a number.
				 */
				fprintf(stderr, "unkown word: ");
				fwrite(native_address(addr), 1, len, stderr);
				fprintf(stderr, "\n");
				panic("unknown word");
			}
			pop();
			if (fc)
				push(-pop());
			if (FETCH(FA_STATE)) {
				/*
				 * If we are in compilation state, we
				 * compile the number (left on the stack)
				 * into the current definition.
				 */
				compile(OP_LIT);
				native_comma();
			}
			continue;
		}

		if (FETCH(FA_STATE)) {
			ucell xt = fetch(wa + WH_CFA_C);

			if (xt == 0)
				compile(fetch(wa + WH_CFA_I));
			else
				interp(xt);
		} else {
			interp(fetch(wa + WH_CFA_I));
		}
	}
}

/*
 * Main program. Command-line arguments are Forth source file names; those
 * files are processed successively. When all files have been processed,
 * the interactive device (stdin) is used.
 */
int
main(int argc, char *argv[])
{
	int i;
	unsigned u;
	ucell wl, wa = 0;

	/*
	 * Initialize stack pointers.
	 */
	data_stack_ptr = (cell *)&dictionary.b[DATA_STACK_END];
	return_stack_ptr = (cell *)&dictionary.b[RETURN_STACK_END];

	/*
	 * Initialize file access table.
	 */
	file_init();

	/*
	 * Initialize memory.
	 */
	STORE(0, OP_EXIT);
	STORE(FA_BASE, 10);
	STORE(FA_HERE, FA_EORA);
	STORE(FA_CURRENT_XT, 0);
	STORE(FA_LAST_WORD, 0);
	STORE(FA_SOURCE_ID, 0);
	STORE(FA_SOURCE_PTR, RESERVED_BEGIN);
	STORE(FA_SOURCE_LEN, 0);
	STORE(FA_TOIN, 0);
	STORE(FA_STATE, 0);

	/*
	 * Create the initial word list.
	 */
	wl = FETCH(FA_HERE);
	allot(CELLS(1 + WL_BUCKET_NUM));
	STORE(wl, 0);
	for (u = 0; u < WL_BUCKET_NUM; u ++)
		STORE(wl + CELLS(1 + u), 0);
	STORE(FA_CURRENT_LIST, wl);
	STORE(FA_CURRENT_SEARCH, wl);

	/*
	 * Create native words.
	 */
	for (u = 0; u < (sizeof nwords) / (sizeof nwords[0]); u ++) {
		if (nwords[u].name == 0) {
			store(wa + WH_CFA_C, (u << 1) + 1);
		} else {
			wa = create_header(nwords[u].name,
				strlen(nwords[u].name));
			store(wa + WH_CFA_I, (u << 1) + 1);
			link_word(wl, wa);
		}
	}

	/*
	 * Create constants.
	 */
	for (u = 0; u < (sizeof nconsts) / (sizeof nconsts[0]); u ++)
		create_constant(wl, nconsts[u].name, nconsts[u].value);

	/*
	 * Process input files.
	 */
	file[0] = stdin;
	for (i = 1; i < argc; i ++) {
		int f = file_open(argv[i], strlen(argv[i]), FF_RO);

		if (f < 0)
			panic("file not found: %s\n", argv[i]);
		STORE(FA_SOURCE_ID, f);
		toplevel();
		file_close(f);
	}
	STORE(FA_SOURCE_ID, 0);
	toplevel();
	return 0;
}