From: Robert Pengelly Date: Mon, 3 Jun 2024 03:35:05 +0000 (+0100) Subject: New server X-Git-Url: https://git.candlhat.org/?a=commitdiff_plain;h=ad0a729e047b3cb2f417067e48df710f733803a3;p=sasm.git New server --- ad0a729e047b3cb2f417067e48df710f733803a3 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fdddb29 --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/Makefile.p32 b/Makefile.p32 new file mode 100644 index 0000000..8e6b41e --- /dev/null +++ b/Makefile.p32 @@ -0,0 +1,25 @@ +#****************************************************************************** +# @file Makefile.p32 +#****************************************************************************** +AS=as386 +CC=gcc386 +LD=ld386 + +COPTS=-S -O2 -fno-common -ansi -I. -I./include -I../pdos/pdpclib -I../pdos/src -D__PDOS386__ -D__32BIT__ -D__NOBIVA__ -D__PDOS__ -Wall -Werror -ansi -m32 -pedantic +COBJ=as.o bin.o cstr.o eval.o expr.o fixup.o frag.o hashtab.o intel.o kwd.o lex.o lib.o list.o listing.o ll.o macro.o obj.o process.o report.o section.o symbol.o vector.o + +all: clean sasm.exe + +sasm.exe: $(COBJ) + $(LD) -s -o sasm.exe ../pdos/pdpclib/pdosst32.o $(COBJ) ../pdos/pdpclib/pdos.a + +.c.o: + $(CC) $(COPTS) -o $*.s $< + $(AS) -o $@ $*.s + rm -f $*.s + +clean: + for %f in ($(COBJ)) do ( rm -f %f ) + + rm -f sasm + rm -f sasm.exe diff --git a/Makefile.pdw b/Makefile.pdw new file mode 100644 index 0000000..73915de --- /dev/null +++ b/Makefile.pdw @@ -0,0 +1,25 @@ +#****************************************************************************** +# @file Makefile.pdw +#****************************************************************************** +AS=aswin +CC=gccwin +LD=ldwin + +COPTS=-S -O2 -fno-common -ansi -I. -I./include -I../pdos/pdpclib -I../pdos/src -D__WIN32__ -D__NOBIVA__ -D__PDOS__ -Wall -Werror -ansi -m32 -pedantic +COBJ=as.o bin.o cstr.o eval.o expr.o fixup.o frag.o hashtab.o intel.o kwd.o lex.o lib.o list.o listing.o ll.o macro.o obj.o process.o report.o section.o symbol.o vector.o + +all: clean sasm.exe + +sasm.exe: $(COBJ) + $(LD) -s -o sasm.exe ../pdos/pdpclib/w32start.o $(COBJ) ../pdos/pdpclib/msvcrt.a ../pdos/src/kernel32.a + +.c.o: + $(CC) $(COPTS) -o $*.s $< + $(AS) -o $@ $*.s + rm -f $*.s + +clean: + for %f in ($(COBJ)) do ( rm -f %f ) + + rm -f sasm + rm -f sasm.exe diff --git a/Makefile.unix b/Makefile.unix new file mode 100644 index 0000000..44941e1 --- /dev/null +++ b/Makefile.unix @@ -0,0 +1,28 @@ +#****************************************************************************** +# @file Makefile.unix +#****************************************************************************** +OBJDIR ?= $(CURDIR) +SRCDIR ?= $(CURDIR) + +VPATH := $(SRCDIR) + +CC := gcc +CFLAGS := -D_FILE_OFFSET_BITS=64 -I$(OBJDIR) -I$(SRCDIR)/include -O2 -Wall -Werror -Wextra -ansi -pedantic -std=c90 + +CSRC := as.c bin.c cstr.c eval.c expr.c fixup.c frag.c hashtab.c intel.c kwd.c lex.c lib.c list.c listing.c ll.c macro.c obj.c process.c report.c section.c symbol.c vector.c + +ifeq ($(OS), Windows_NT) +all: sasm.exe + +sasm.exe: $(CSRC) + $(CC) $(CFLAGS) -o $@ $^ +else +all: sasm + +sasm: $(CSRC) + $(CC) $(CFLAGS) -o $@ $^ +endif + +clean: + if [ -f sasm ]; then rm -rf sasm; fi + if [ -f sasm.exe ]; then rm -rf sasm.exe; fi diff --git a/Makefile.w32 b/Makefile.w32 new file mode 100644 index 0000000..c70d0cf --- /dev/null +++ b/Makefile.w32 @@ -0,0 +1,21 @@ +#****************************************************************************** +# @file Makefile.w32 +#****************************************************************************** +OBJDIR ?= $(CURDIR) +SRCDIR ?= $(CURDIR) + +VPATH := $(SRCDIR) + +CC := gcc +CFLAGS := -D_FILE_OFFSET_BITS=64 -I$(OBJDIR) -I$(SRCDIR)/include -O2 -Wall -Werror -Wextra -ansi -pedantic -std=c90 + +CSRC := as.c bin.c cstr.c eval.c expr.c fixup.c frag.c hashtab.c intel.c kwd.c lex.c lib.c list.c listing.c ll.c macro.c obj.c process.c report.c section.c symbol.c vector.c + +all: sasm.exe + +clean: + if exist sasm ( del /q sasm ) + if exist sasm.exe ( del /q sasm.exe ) + +sasm.exe: $(CSRC) + $(CC) $(CFLAGS) -o $@ $^ diff --git a/README.md b/README.md new file mode 100644 index 0000000..fd9d8f8 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +## What is sasm? + + Small Assembler (SASM) is a very small assembeler for the i80x + line of processors. + +## License + + All source code is Public Domain. + +## Obtain the source code + + git clone https://git.candlhat.org/sasm.git + +## Building + + BSD: + + Make sure you have gcc and gmake installed then run gmake -f Makefile.unix. + + Linux: + + Make sure you have gcc and make installed then run make -f Makefile.unix. + + macOS: + + Make sure you have xcode command line tools installed then run + make -f Makefile.unix. + + Windows: + + Make sure you have mingw installed and the location within your PATH variable + then run mingw32-make.exe -f Makefile.w32. + +## Usage + + Example (comments and pre-process directives): + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; This is a comment. + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + %define SOMETHING 5 + + %if SOMETHING == 5 + ... + %endif + + Code Directives: + + .8086 - Only allow 8086 instructions. + .186 - Allow 186 and below instructions. + .286/.286p - Allow 286 and below instructions. + .386/.386p - Allow 386 and below instructions. + .486/.480p - Allow 486 and below instructions. + .586 - Allow 586 and below instructions. + .686 - Allow 686 and below instructions. + + .code/text - Output bytes to the code/text section/segment. + .data - Output bytes to the data section/segment. + .bss - Output bytes to the bss section/segment. + + align - Align the code to a specific boundary. + extern/extrn - Define a symbol that's in a different file. + global/public - Make the specified symbol available + + db - Output a byte to the object file. + dw - Output a word to the object file. + dd - Output a dword to the object file. + + If you want to pad the file, you need the following: + + db [padding amount] dup [byte to pad with] diff --git a/as.c b/as.c new file mode 100644 index 0000000..3c15e73 --- /dev/null +++ b/as.c @@ -0,0 +1,186 @@ +/****************************************************************************** + * @file as.c + *****************************************************************************/ +#include +#include + +#include "as.h" +#include "lex.h" +#include "lib.h" +#include "listing.h" +#include "process.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +struct as_state *state = 0; +const char *program_name = 0; + +extern void output_binary (FILE *fp); +extern void output_obj (FILE *fp); + +extern void keywords_init (void); +extern void sections_init (void); + +static void cleanup (void) { + + if (state->ofp) { fclose (state->ofp); } + + if (get_error_count () > 0) { + + if (state->ofile) { + remove (state->ofile); + } + + if (state->lfile) { + remove (state->lfile); + } + + } + +} + +extern void fixup_code (void); +extern void machine_dependent_init (void); + +int main (int argc, char **argv) { + + struct symbol *symbol; + char *p, *root; + + if (argc && *argv) { + + program_name = *argv; + + if ((p = strrchr (program_name, '/')) || (p = strrchr (program_name, '\\'))) { + program_name = (p + 1); + } + + } + + atexit (cleanup); + lex_init (); + + state = xmalloc (sizeof (*state)); + parse_args (argc, argv, 1); + + if (!state->ifile) { + + report_at (program_name, 0, REPORT_ERROR, "no input file specified"); + return EXIT_FAILURE; + + } + + if (state->ifile && strcmp (state->ifile, "-")) { + + if ((p = strrchr (state->ifile, '/')) || (p = strrchr (state->ifile, '\\'))) { + + unsigned int len = p - state->ifile; + + root = xmalloc (len + 2); + sprintf (root, "%.*s/", (int) len, state->ifile); + + add_include_path (root); + free (root); + + } + + } + + machine_dependent_init (); + + keywords_init (); + sections_init (); + + if (preprocess_init ()) { + return EXIT_FAILURE; + } + + process_file (state->ifile); + + if (get_error_count () > 0) { + return EXIT_FAILURE; + } + + fixup_code (); + + if (state->lfile) { + generate_listing (); + } + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_external (symbol) && symbol_get_section (symbol) == undefined_section) { + + if (symbol->scope == SYMBOL_SCOPE_GLOBAL) { + report_at (program_name, 0, REPORT_ERROR, "undefined global symbol '%s'", symbol->name); + } + + } + + } + + if (get_error_count () > 0) { + return EXIT_FAILURE; + } + + if (state->format == AS_OUTPUT_BIN) { + + int report_output = 1; + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if ((symbol_is_external (symbol) && symbol_get_section (symbol) == undefined_section) || symbol_is_undefined (symbol)) { + + if (symbol->scope == SYMBOL_SCOPE_GLOBAL) { + + report_at (program_name, 0, REPORT_ERROR, "undefined global symbol '%s'", symbol->name); + continue; + + } + + if (report_output) { + + report_at (program_name, 0, REPORT_ERROR, "%s output does not support external references", state->format == AS_OUTPUT_BIN ? "binary" : "com"); + report_output = 0; + + } + + report_at (program_name, 0, REPORT_ERROR, "undefined external symbol '%s'", symbol->name); + + } + + } + + if (get_error_count () > 0) { + return EXIT_FAILURE; + } + + if (!(state->ofp = fopen (state->ofile, "wb"))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to open '%s' for writing", state->ofile); + return EXIT_FAILURE; + + } + + output_binary (state->ofp); + return EXIT_SUCCESS; + + } + + if (!(state->ofp = fopen (state->ofile, "wb"))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to open '%s' for writing", state->ofile); + return EXIT_FAILURE; + + } + + output_obj (state->ofp); + + if (get_error_count () > 0) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; + +} diff --git a/as.h b/as.h new file mode 100644 index 0000000..d33151c --- /dev/null +++ b/as.h @@ -0,0 +1,45 @@ +/****************************************************************************** + * @file as.h + *****************************************************************************/ +#ifndef _AS_H +#define _AS_H + +#include + +#include "list.h" +#include "symbol.h" +#include "vector.h" + +struct proc { + + struct vector regs, args; + char *name; + + char *filename; + unsigned long line_number; + +}; + +#define AS_OUTPUT_OBJ 0x00 +#define AS_OUTPUT_BIN 0x01 + +struct as_state { + + const char *ifile, *ofile, *lfile; + FILE *ofp; + + struct list *pplist; + int model, data_size; + + struct vector procs; + char *ext; + + struct symbol *end_symbol; + int format; + +}; + +extern struct as_state *state; +extern const char *program_name; + +#endif /* _AS_H */ diff --git a/bin.c b/bin.c new file mode 100644 index 0000000..a22eaa3 --- /dev/null +++ b/bin.c @@ -0,0 +1,99 @@ +/****************************************************************************** + * @file bin.c + *****************************************************************************/ +#include "limits.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" + +#include "as.h" +#include "fixup.h" +#include "frag.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +static unsigned long text_size = 0, data_size = 0, bss_size = 0; +static void *output = 0; + +void output_binary (FILE *fp) { + + unsigned long i = 0; + struct frag *frag; + + section_set (text_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + text_size += frag->fixed_size; + + } + + section_set (data_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + data_size += frag->fixed_size; + + } + + section_set (bss_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + bss_size += frag->fixed_size; + + } + + output = xmalloc (text_size + data_size); + section_set (text_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + memcpy ((unsigned char *) output + i, frag->buf, frag->fixed_size); + i += frag->fixed_size; + + } + + section_set (data_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + memcpy ((unsigned char *) output + i, frag->buf, frag->fixed_size); + i += frag->fixed_size; + + } + + if (get_error_count () > 0) { + exit (EXIT_FAILURE); + } + + if (fwrite (output, text_size + data_size, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed to write data to '%s'", state->ofile); + exit (EXIT_FAILURE); + + } + +} diff --git a/cstr.c b/cstr.c new file mode 100644 index 0000000..d518f2e --- /dev/null +++ b/cstr.c @@ -0,0 +1,69 @@ +/****************************************************************************** + * @file cstr.c + *****************************************************************************/ +#include +#include + +#include "cstr.h" + +extern void *xrealloc (void *__ptr, unsigned int __size); + +static void cstr_realloc (struct cstring *cstr, int new_size) { + + int size = cstr->size_allocated; + + if (size < 8) { + size = 8; + } + + while (size < new_size) { + size *= 2; + } + + cstr->data = xrealloc (cstr->data, size); + cstr->size_allocated = size; + +} + +void cstr_ccat (struct cstring *cstr, int ch) { + + int size = cstr->size + 1; + + if (size > cstr->size_allocated) { + cstr_realloc (cstr, size); + } + + ((unsigned char *) cstr->data)[size - 1] = ch; + cstr->size = size; + +} + +void cstr_cat (struct cstring *cstr, const char *str, int len) { + + int size; + + if (len <= 0) { + len = strlen (str) + 1 + len; + } + + size = cstr->size + len; + + if (size > cstr->size_allocated) { + cstr_realloc (cstr, size); + } + + memmove (((unsigned char *) cstr->data) + cstr->size, str, len); + cstr->size = size; + +} + +void cstr_new (struct cstring *cstr) { + memset (cstr, 0, sizeof (struct cstring)); +} + +void cstr_free (struct cstring *cstr) { + + free (cstr->data); + cstr_new (cstr); + +} diff --git a/cstr.h b/cstr.h new file mode 100644 index 0000000..2736720 --- /dev/null +++ b/cstr.h @@ -0,0 +1,20 @@ +/****************************************************************************** + * @file cstr.h + *****************************************************************************/ +#ifndef _CSTR_H +#define _CSTR_H + +struct cstring { + + int size, size_allocated; + void *data; + +}; + +void cstr_ccat (struct cstring *cstr, int ch); +void cstr_cat (struct cstring *cstr, const char *str, int len); + +void cstr_new (struct cstring *cstr); +void cstr_free (struct cstring *cstr); + +#endif /* _CSTR_H */ diff --git a/eval.c b/eval.c new file mode 100644 index 0000000..f0fd20e --- /dev/null +++ b/eval.c @@ -0,0 +1,620 @@ +/****************************************************************************** + * @file eval.c + *****************************************************************************/ +#include +#include +#include +#include +#include + +#include "as.h" +#include "eval.h" +#include "lex.h" +#include "lib.h" +#include "macro.h" +#include "report.h" + +static unsigned int eval_expr (unsigned int lhs, char *start, char **pp, int outer_prec); + +static unsigned int eval_unary (unsigned int lhs, char *start, char **pp) { + + *pp = skip_whitespace (*pp); + + if (isdigit ((int) **pp)) { + + unsigned int temp, temp2; + int ch; + + if ((*pp)[0] == '0' && tolower ((int) (*pp)[1]) == 'x') { + + unsigned int base = 16; + *pp += 2; + + while (isxdigit ((int) **pp)) { + + temp = lhs * base; + ch = *((*pp)++); + + if (ch >= '0' && ch <= '9') { + temp2 = ch - '0'; + } else { + temp2 = (ch & 0xdf) - ('A' - 10); + } + + lhs = temp + temp2; + + } + + } else if ((*pp)[0] == '0') { + + unsigned int base = 8; + + while (isdigit ((int) **pp)) { + + temp = lhs * base; + lhs = (temp + (*((*pp)++) - '0')); + + } + + } else { + + unsigned int base = 10; + + while (isdigit ((int) **pp)) { + + temp = lhs * base; + lhs = (temp + (*((*pp)++) - '0')); + + } + + } + + return lhs; + + } + + if (is_name_beginner ((int) **pp)) { + + char *sname, *caret; + + struct hashtab_name *key; + struct macro *m; + + caret = *pp; + + while (is_name_part ((int) *caret)) { + caret++; + } + + if (memcmp (*pp, "defined", caret - *pp) == 0) { + + caret = skip_whitespace (caret); + *pp = caret; + + if (*caret == '(') { + + caret = skip_whitespace (caret + 1); + *pp = caret; + + while (!is_end_of_line[(int) *caret]) { + + if (isspace ((int) *caret) || *caret == ')') { break; } + caret++; + + } + + sname = xstrndup (*pp, caret - *pp); + caret = skip_whitespace (caret); + + if (*caret != ')') { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "missing ')' after \"defined\""); + } + + } else { + sname = xstrndup (*pp, caret - *pp); + } + + if (*caret == ')') { caret++; } + *pp = skip_whitespace (caret); + + (void) sname; + (void) key; + + lhs = (find_macro (sname) != NULL); + return lhs; + + } + + sname = xstrndup (*pp, caret - *pp); + *pp = skip_whitespace (caret); + + if ((key = find_macro (sname))) { + + if ((m = get_macro (key))) { + + char *temp = process_macro (start, pp, m); + lhs = eval_unary (lhs, temp, &temp); + + } + + } + + return lhs; + + } + + if (**pp == '@') { + + char *arg, *temp = (*pp + 1); + + if ((arg = symname (&temp))) { + + if (xstrcasecmp (arg, "DataSize") == 0) { + + *pp = temp; + free (arg); + + return state->data_size; + + } + + if (xstrcasecmp (arg, "Model") == 0) { + + *pp = temp; + free (arg); + + return (state->model > 0 ? state->model : 1); + + } + + free (arg); + + } + + } + + if (**pp == '!') { + + unsigned int temp = 0; + + *pp = skip_whitespace (*pp + 1); + temp = eval_unary (temp, start, pp); + + lhs = (temp == 0); + return lhs; + + } + + if (**pp == '~') { + + int flip_bits = 0; + + while (**pp == '~') { + + flip_bits = !flip_bits; + *pp = skip_whitespace (*pp + 1); + + } + + lhs = eval_unary (lhs, start, pp); + + if (flip_bits) { + lhs = ~lhs; + } + + return lhs; + + } + + if (**pp == '-') { + + int sign = 1; + + while (**pp == '-' || **pp == '+') { + + if (**pp == '-') { sign = !sign; } + *pp = skip_whitespace (*pp + 1); + + } + + lhs = eval_unary (lhs, start, pp); + + if (!sign) { + lhs = -lhs; + } + + return lhs; + + } + + if (**pp == '(') { + + char *caret = (*pp)++; + int depth = 0; + + while (!is_end_of_line[(int) **pp]) { + + if (**pp == '(') { + + (*pp)++; + + depth++; + continue; + + } + + if (**pp == ')') { + + if (depth > 0) { + + (*pp)++; + + depth--; + continue; + + } + + break; + + } + + (*pp)++; + + } + + if (**pp != ')') { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, caret, "missing ')' in expression"); + } else { + (*pp)++; + } + + sprintf (caret, "%.*s", (int) (*pp - caret - 1), caret + 1); + + lhs = eval_unary (lhs, start, &caret); + lhs = eval_expr (lhs, start, &caret, 15); + + return lhs; + + } + + if (!is_end_of_line[(int) **pp]) { + + report_line_at (get_filename (), get_line_number (), REPORT_INTERNAL_ERROR, start, *pp, "unexpected %c character", **pp); + + while (!is_end_of_line[(int) **pp]) { + (*pp)++; + } + + } + + return lhs; + +} + +#define OP_MUL 0 +#define OP_DIV 1 +#define OP_MOD 2 +#define OP_PLUS 3 +#define OP_MINUS 4 +#define OP_LT 5 +#define OP_GT 6 +#define OP_LTEQ 7 +#define OP_GTEQ 8 +#define OP_EQEQ 9 +#define OP_NOTEQ 11 +#define OP_AND 12 +#define OP_XOR 13 +#define OP_OR 14 +#define OP_ANDAND 15 +#define OP_OROR 16 +#define OP_LSHIFT 17 +#define OP_RSHIFT 18 +#define OP_QUEST 19 +#define OP_MAX 20 + +struct op { + + char *word; + int kind; + +}; + +static struct op *get_op (char **pp) { + + static struct op kws[] = { + + { "<=", OP_LTEQ }, + { ">=", OP_GTEQ }, + { "==", OP_EQEQ }, + { "!=", OP_NOTEQ }, + { "&&", OP_ANDAND }, + { "||", OP_OROR }, + { "<<", OP_LSHIFT }, + { ">>", OP_RSHIFT }, + + { "*", OP_MUL }, + { "/", OP_DIV }, + { "%", OP_MOD }, + { "+", OP_PLUS }, + { "-", OP_MINUS }, + { "<", OP_LT }, + { ">", OP_GT }, + { "&", OP_AND }, + { "^", OP_XOR }, + { "|", OP_OR }, + { "?", OP_QUEST } + + }; + + struct op *kw; + unsigned int i; + + for (i = 0; i < (sizeof (kws) / sizeof (*kws)); i++) { + + kw = &kws[i]; + + if (strncmp (*pp, kw->word, strlen (kw->word)) == 0) { + + *pp += strlen (kw->word); + return kw; + + } + + } + + return 0; + +} + +static int get_prec (int kind) { + + switch (kind) { + + case OP_MUL: case OP_DIV: case OP_MOD: + + return 3; + + case OP_PLUS: case OP_MINUS: + + return 4; + + case OP_LSHIFT: case OP_RSHIFT: + + return 5; + + case OP_LT: case OP_GT: case OP_LTEQ: case OP_GTEQ: + + return 6; + + case OP_EQEQ: case OP_NOTEQ: + + return 7; + + case OP_AND: + + return 8; + + case OP_XOR: + + return 9; + + case OP_OR: + + return 10; + + case OP_ANDAND: + + return 11; + + case OP_OROR: + + return 12; + + case OP_QUEST: + + return 13; + + default: + + break; + + } + + return 100; + +} + +static unsigned int eval_expr (unsigned int lhs, char *start, char **pp, int outer_prec) { + + struct op *op1, *op2; + unsigned int rhs; + + int prec, look_ahead; + + for (;;) { + + *pp = skip_whitespace (*pp); + + if (is_end_of_line[(int) **pp]) { + break; + } + + op1 = get_op (pp); + + if (!op1 || (prec = get_prec (op1->kind)) > outer_prec) { + + if (op1) { *pp -= strlen (op1->word); } + break; + + } + + *pp = skip_whitespace (*pp); + + if (op1->kind == OP_QUEST) { + + unsigned int left = 0, right = 0; + + left = eval_unary (left, start, pp); + left = eval_expr (left, start, pp, 14); + + *pp = skip_whitespace (*pp); + assert (**pp == ':'); + *pp = skip_whitespace (*pp + 1); + + right = eval_unary (right, start, pp); + right = eval_expr (right, start, pp, 14); + + if (lhs != 0) { + lhs = left; + } else { + lhs = right; + } + + continue; + + } + + if (is_end_of_line[(int) **pp]) { + break; + } + + rhs = 0; + rhs = eval_unary (rhs, start, pp); + + for (;;) { + + *pp = skip_whitespace (*pp); + + if (is_end_of_line[(int) **pp]) { + break; + } + + op2 = get_op (pp); + + if (!op2 || (look_ahead = get_prec (op2->kind)) > prec) { + + if (op2) { *pp -= strlen (op2->word); } + break; + + } + + *pp = skip_whitespace (*pp); + rhs = eval_expr (rhs, start, pp, look_ahead); + + } + + switch (op1->kind) { + + case OP_MUL: + + lhs *= rhs; + break; + + case OP_DIV: + + lhs /= rhs; + break; + + case OP_MOD: + + lhs %= rhs; + break; + + case OP_PLUS: + + lhs += rhs; + break; + + case OP_MINUS: + + lhs -= rhs; + break; + + case OP_LT: + + lhs = (lhs < rhs); + break; + + case OP_GT: + + lhs = (lhs > rhs); + break; + + case OP_LTEQ: + + lhs = (lhs <= rhs); + break; + + case OP_GTEQ: + + lhs = (lhs >= rhs); + break; + + case OP_EQEQ: + + lhs = (lhs == rhs); + break; + + case OP_NOTEQ: + + lhs = (lhs != rhs); + break; + + case OP_AND: + + lhs &= rhs; + break; + + case OP_XOR: + + lhs ^= rhs; + break; + + case OP_OR: + + lhs |= rhs; + break; + + case OP_ANDAND: + + lhs = ((lhs != 0) && (rhs != 0)); + break; + + case OP_OROR: + + lhs = ((lhs != 0) || (rhs != 0)); + break; + + case OP_LSHIFT: + + lhs <<= rhs; + break; + + case OP_RSHIFT: + + lhs >>= rhs; + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "unimplemented"); + break; + + } + + } + + return lhs; + +} + +int eval (char *start, char **pp) { + + unsigned int lhs = 0; + + lhs = eval_unary (lhs, start, pp); + lhs = eval_expr (lhs, start, pp, 15); + + return (lhs != 0); + +} diff --git a/eval.h b/eval.h new file mode 100644 index 0000000..9361938 --- /dev/null +++ b/eval.h @@ -0,0 +1,9 @@ +/****************************************************************************** + * @file eval.h + *****************************************************************************/ +#ifndef _EVAL_H +#define _EVAL_H + +int eval (char *start, char **pp); + +#endif /* _EVAL_H */ diff --git a/expr.c b/expr.c new file mode 100644 index 0000000..6e944d9 --- /dev/null +++ b/expr.c @@ -0,0 +1,1395 @@ +/****************************************************************************** + * @file expr.c + *****************************************************************************/ +#include +#include +#include + +#include "expr.h" +#include "frag.h" +#include "lex.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +/** + * Expression symbols are mapped to file positions to provide + * better error messages. + */ +struct expr_symbol_line { + + struct symbol *symbol; + + const char *filename; + unsigned long line_number; + + struct expr_symbol_line *next; + +}; + +static struct expr_symbol_line *expr_symbol_lines = 0; + +static char *read_character (const char *start, char *p, unsigned long *ch) { + + if (*p == '\\') { + + p++; + + if (*p == '\'') { + + *ch = '\''; + p++; + + } else if (*p == '\"') { + + *ch = '"'; + p++; + + } else if (*p == '\\') { + + *ch = '\\'; + p++; + + } else if (*p == 'a') { + + *ch = 0x07; + p++; + + } else if (*p == 'b') { + + *ch = 0x08; + p++; + + } else if (*p == 't') { + + *ch = 0x09; + p++; + + } else if (*p == 'n') { + + *ch = 0x0a; + p++; + + } else if (*p == 'v') { + + *ch = 0x0b; + p++; + + } else if (*p == 'f') { + + *ch = 0x0c; + p++; + + } else if (*p == 'r') { + + *ch = 0x0d; + p++; + + } else if (*p == 'e') { + + *ch = 0x1b; + p++; + + } else if (*p >= '0' && *p <= '7') { + + unsigned long i = 0; + *ch = 0; + + while (*p >= '0' && *p <= '7') { + + if (++i > 3) { + break; + } + + *ch = *ch * 8 + (*p - '0'); + p++; + + } + + } else { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, p - 1, "unknown escape sequence: '\\%c'", *p); + *ch = *p++; + + } + + } else { + *ch = *p++; + } + + return p; + +} + +static int chrpos (char *s, int ch) { + + char *p = strchr (s, ch); + return p ? p - s : -1; + +} + +static void integer_constant (char *start, char **pp, struct expr *expr, int radix) { + + long value = 0; + int k; + + while (!is_end_of_line[(int) **pp] && (k = chrpos ("0123456789abcdef", tolower ((int) **pp))) >= 0) { + + if (k >= radix) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "invalid digit in integer literal"); + } + + value = value * radix + k; + (*pp)++; + + } + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = value; + +} + +enum expr_type machine_dependent_parse_operator (char **pp, char *name, char *original_saved_c, unsigned int operands); + +int machine_dependent_need_index_operator (void); +int machine_dependent_parse_name (char **pp, struct expr *expr, char *name, char *original_saved_c); + +void machine_dependent_parse_operand (char *start, char **pp, struct expr *expr); + +static enum expr_type operator (char *p, unsigned int *operator_size) { + + char *start; + enum expr_type ret; + + if (is_name_beginner ((int) *p)) { + + char *name; + char c; + + name = p; + + c = get_symbol_name_end (&p); + ret = machine_dependent_parse_operator (&p, name, &c, 2); + + switch (ret) { + + case EXPR_TYPE_ABSENT: + + *p = c; + + p = name; + break; + + default: + + *p = c; + *operator_size = p - name; + + return ret; + + } + + } + + switch (*p) { + + case '+': + + *operator_size = 1; + return EXPR_TYPE_ADD; + + case '-': + + *operator_size = 1; + return EXPR_TYPE_SUBTRACT; + + case '<': + + switch (p[1]) { + + case '<': + + *operator_size = 2; + return EXPR_TYPE_LEFT_SHIFT; + + case '>': + + *operator_size = 2; + return EXPR_TYPE_NOT_EQUAL; + + case '=': + + *operator_size = 2; + return EXPR_TYPE_LESSER_EQUAL; + + } + + *operator_size = 1; + return EXPR_TYPE_LESSER; + + case '>': + + switch (p[1]) { + + case '>': + + *operator_size = 2; + return EXPR_TYPE_RIGHT_SHIFT; + + case '=': + + *operator_size = 2; + return EXPR_TYPE_GREATER_EQUAL; + + } + + *operator_size = 1; + return EXPR_TYPE_GREATER; + + case '=': + + if (p[1] != '=') { + + *operator_size = 0; + return EXPR_TYPE_INVALID; + + } + + *operator_size = 2; + return EXPR_TYPE_EQUAL; + + case '!': + + if (p[1] != '=') { + + *operator_size = 0; + return EXPR_TYPE_INVALID; + + } + + *operator_size = 2; + return EXPR_TYPE_NOT_EQUAL; + + case '|': + + if (p[1] != '|') { + + *operator_size = 1; + return EXPR_TYPE_BIT_INCLUSIVE_OR; + + } + + *operator_size = 2; + return EXPR_TYPE_LOGICAL_OR; + + case '&': + + if (p[1] != '&') { + + *operator_size = 1; + return EXPR_TYPE_BIT_AND; + + } + + *operator_size = 2; + return EXPR_TYPE_LOGICAL_AND; + + case '/': + + *operator_size = 1; + return EXPR_TYPE_DIVIDE; + + case '%': + + *operator_size = 1; + return EXPR_TYPE_MODULUS; + + case '*': + + *operator_size = 1; + return EXPR_TYPE_MULTIPLY; + + case '^': + + *operator_size = 1; + return EXPR_TYPE_BIT_EXCLUSIVE_OR; + + default: + + start = p; + + ret = machine_dependent_parse_operator (&p, 0, 0, 2); + *operator_size = p - start; + + return ret; + + } + +} + +static struct section *operand (char *start, char **pp, struct expr *expr, int expr_mode) { + + struct section *ret_section; + char ch; + + expr->type = EXPR_TYPE_INVALID; + expr->add_number = 0; + + expr->add_symbol = 0; + expr->op_symbol = 0; + + *pp = skip_whitespace (*pp); + + if (is_end_of_line[(int) **pp]) { + goto end_of_line; + } + + switch (**pp) { + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + integer_constant (start, pp, expr, 10); + break; + + case '0': + + (*pp)++; + + switch (**pp) { + + case 'X': + case 'x': + + (*pp)++; + + integer_constant (start, pp, expr, 16); + break; + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + + integer_constant (start, pp, expr, 8); + break; + + case 'B': + case 'b': + + if ((*pp)[1] == '0' || (*pp)[1] == '1') { + + (*pp)++; + + integer_constant (start, pp, expr, 2); + break; + + } + + /* fall through */ + + default: + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + break; + + } + + break; + + case '[': + + if (machine_dependent_need_index_operator ()) { + goto default_; + } + + /* fall through */ + + case '(': + + ch = (*pp)++[0]; + ret_section = read_into (start, pp, expr, 0, expr_mode); + + if ((ch == '(' && (*pp)[0] != ')') || (ch == '[' && (*pp)[0] != ']')) { + + if ((*pp)[0]) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "found '%c' but expected '%c'", (*pp)[0], (ch == '(' ? ')' : ']')); + } else { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "missing '%c'", (ch == '(' ? ')' : ']')); + } + + } else { + (*pp)++; + } + + *pp = skip_whitespace (*pp); + return ret_section; + + case '+': + case '-': + case '~': + case '!': + + ch = *((*pp)++); + + unary: + + operand (start, pp, expr, expr_mode); + + if (expr->type == EXPR_TYPE_CONSTANT) { + + switch (ch) { + + case '-': + + expr->add_number = -expr->add_number; + break; + + case '~': + + expr->add_number = ~expr->add_number; + break; + + case '!': + + expr->add_number = !expr->add_number; + break; + + } + + } else if (expr->type != EXPR_TYPE_INVALID && expr->type != EXPR_TYPE_ABSENT) { + + if (ch != '+') { + + expr->add_symbol = make_expr_symbol (expr); + expr->op_symbol = 0; + + expr->add_number = 0; + + switch (ch) { + + case '-': + + expr->type = EXPR_TYPE_UNARY_MINUS; + break; + + case '~': + + expr->type = EXPR_TYPE_BIT_NOT; + break; + + case '!': + + expr->type = EXPR_TYPE_LOGICAL_NOT; + break; + + } + + } + + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "unary operator %c ignored because bad operand follows", ch); + } + + break; + + case '\'': + + *pp = read_character (start, ++(*pp), &expr->add_number); + + if (**pp == '\'') { + (*pp)++; + } + + expr->type = EXPR_TYPE_CONSTANT; + break; + + case '.': + + if (!is_name_part ((int) (*pp)[1])) { + + current_location (expr); + + (*pp)++; + break; + + } else { + goto is_name; + } + + default: + default_: + + if (is_name_beginner ((int) **pp)) { + + struct symbol *symbol; + char *name; + + is_name: + + name = *pp; + ch = get_symbol_name_end (pp); + + /* Checks in machine dependent way whether the name is an unary oprator. */ + { + + enum expr_type ret = machine_dependent_parse_operator (pp, name, &ch, 1); + + switch (ret) { + + case EXPR_TYPE_UNARY_MINUS: + + **pp = ch; + + ch = '-'; + goto unary; + + case EXPR_TYPE_BIT_NOT: + + **pp = ch; + + ch = '~'; + goto unary; + + case EXPR_TYPE_LOGICAL_NOT: + + **pp = ch; + + ch = '!'; + goto unary; + + case EXPR_TYPE_INVALID: + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of operator \"%s\"", name); + break; + + default: + + break; + + } + + if (ret != EXPR_TYPE_INVALID && ret != EXPR_TYPE_ABSENT) { + + **pp = ch; + read_into (start, pp, expr, 9, expr_mode); + + expr->add_symbol = make_expr_symbol (expr); + + expr->add_number = 0; + expr->op_symbol = 0; + + expr->type = ret; + break; + + } + + } + + if (machine_dependent_parse_name (pp, expr, name, &ch)) { + + **pp = ch; + break; + + } + + symbol = symbol_find_or_make (name, SYMBOL_SCOPE_LOCAL); + **pp = ch; + + if (symbol_get_section (symbol) == absolute_section && !symbol_force_reloc (symbol)) { + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = symbol_get_value (symbol); + + } else { + + expr->type = EXPR_TYPE_SYMBOL; + expr->add_symbol = symbol; + + expr->add_number = 0; + + } + + } else { + + expr->type = EXPR_TYPE_ABSENT; + machine_dependent_parse_operand (start, pp, expr); + + if (expr->type == EXPR_TYPE_ABSENT) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "bad expression"); + (*pp)++; + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + } + + } + + break; + + case ',': + end_of_line: + + expr->type = EXPR_TYPE_ABSENT; + break; + + } + + *pp = skip_whitespace (*pp); + + switch (expr->type) { + + case EXPR_TYPE_SYMBOL: + + return symbol_get_section (expr->add_symbol); + + case EXPR_TYPE_REGISTER: + + return reg_section; + + default: + + return absolute_section; + + } + +} + +static unsigned int op_rank_table[EXPR_TYPE_MAX] = { + + 0, /* EXPR_TYPE_INVALID */ + 0, /* EXPR_TYPE_ABSENT */ + 0, /* EXPR_TYPE_CONSTANT */ + 0, /* EXPR_TYPE_SYMBOL */ + 0, /* EXPR_TYPE_REGISTER */ + 1, /* EXPR_TYPE_INDEX */ + 2, /* EXPR_TYPE_LOGICAL_OR */ + 3, /* EXPR_TYPE_LOGICAL_AND */ + 4, /* EXPR_TYPE_EQUAL */ + 4, /* EXPR_TYPE_NOT_EQUAL */ + 4, /* EXPR_TYPE_LESSER */ + 4, /* EXPR_TYPE_LESSER_EQUAL */ + 4, /* EXPR_TYPE_GREATER */ + 4, /* EXPR_TYPE_GREATER_EQUAL */ + 5, /* EXPR_TYPE_ADD */ + 5, /* EXPR_TYPE_SUBTRACT */ + 7, /* EXPR_TYPE_BIT_INCLUSIVE_OR */ + 7, /* EXPR_TYPE_BIT_EXCLUSIVE_OR */ + 7, /* EXPR_TYPE_BIT_AND */ + 8, /* EXPR_TYPE_MULTIPLY */ + 8, /* EXPR_TYPE_DIVIDE */ + 8, /* EXPR_TYPE_MODULUS */ + 8, /* EXPR_TYPE_LEFT_SHIFT */ + 8, /* EXPR_TYPE_RIGHT_SHIFT */ + 9, /* EXPR_TYPE_LOGICAL_NOT */ + 9, /* EXPR_TYPE_BIT_NOT */ + 9 /* EXPR_TYPE_UNARY_MINUS */ + + /* Machine dependent operators default to rank 0 but expr_type_set_rank() can be used to change the rank. */ + +}; + +void expr_type_set_rank (enum expr_type expr_type, unsigned int rank) { + op_rank_table[expr_type] = rank; +} + +struct section *current_location (struct expr *expr) { + + expr->type = EXPR_TYPE_SYMBOL; + expr->add_number = 0; + + expr->add_symbol = symbol_temp_new_now (); + expr->op_symbol = 0; + + return symbol_get_section (expr->add_symbol); + +} + +struct section *read_into (char *start, char **pp, struct expr *expr, unsigned int rank, int expr_mode) { + + enum expr_type left_op; + struct expr right_expr; + + struct section *ret_section; + unsigned int operator_size; + + ret_section = operand (start, pp, expr, expr_mode); + left_op = operator (*pp, &operator_size); + + while (left_op != EXPR_TYPE_INVALID && op_rank_table[left_op] > rank) { + + enum expr_type right_op; + + struct section *right_section; + signed long offset; + + *pp += operator_size; + right_section = read_into (start, pp, &right_expr, op_rank_table[left_op], expr_mode); + + if (right_expr.type == EXPR_TYPE_ABSENT) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "missing operand; zero assumed"); + + right_expr.type = EXPR_TYPE_CONSTANT; + right_expr.add_number = 0; + + right_expr.add_symbol = 0; + right_expr.op_symbol = 0; + + } + + if (left_op == EXPR_TYPE_INDEX) { + + if (**pp != ']') { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "missing ']'"); + } else { + *pp = skip_whitespace (*pp + 1); + } + + } + + right_op = operator (*pp, &operator_size); + + if (left_op == EXPR_TYPE_ADD && right_expr.type == EXPR_TYPE_CONSTANT && expr->type != EXPR_TYPE_REGISTER) { + expr->add_number += right_expr.add_number; + } else if (left_op == EXPR_TYPE_SUBTRACT && right_expr.type == EXPR_TYPE_SYMBOL && expr->type == EXPR_TYPE_SYMBOL && ret_section == right_section && ((SECTION_IS_NORMAL (ret_section) && !symbol_force_reloc (expr->add_symbol) && !symbol_force_reloc (right_expr.add_symbol)) || expr->add_symbol == right_expr.add_symbol) && frags_offset_is_fixed (symbol_get_frag (expr->add_symbol), symbol_get_frag (right_expr.add_symbol), &offset)) { + + expr->add_number += symbol_get_value (expr->add_symbol) - symbol_get_value (right_expr.add_symbol); + expr->add_number -= right_expr.add_number; + expr->add_number -= offset; + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_symbol = 0; + + } else if (left_op == EXPR_TYPE_SUBTRACT && right_expr.type == EXPR_TYPE_CONSTANT && expr->type != EXPR_TYPE_REGISTER) { + expr->add_number -= right_expr.add_number; + } else if (left_op == EXPR_TYPE_ADD && expr->type == EXPR_TYPE_CONSTANT && right_expr.type != EXPR_TYPE_REGISTER) { + + expr->type = right_expr.type; + + expr->add_symbol = right_expr.add_symbol; + expr->op_symbol = right_expr.op_symbol; + + expr->add_number += right_expr.add_number; + ret_section = right_section; + + } else if (expr->type == EXPR_TYPE_CONSTANT && right_expr.type == EXPR_TYPE_CONSTANT) { + + /* Checks for division by zero. */ + if ((left_op == EXPR_TYPE_DIVIDE || left_op == EXPR_TYPE_MODULUS) && right_expr.add_number == 0) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "division by zero"); + right_expr.add_number = 1; + + } + + switch (left_op) { + + case EXPR_TYPE_LOGICAL_OR: + + expr->add_number = expr->add_number || right_expr.add_number; + break; + + case EXPR_TYPE_LOGICAL_AND: + + expr->add_number = expr->add_number && right_expr.add_number; + break; + + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + + expr->add_number = (expr->add_number == right_expr.add_number) ? ~(signed long) 0 : 0; + + if (left_op == EXPR_TYPE_NOT_EQUAL) { + expr->add_number = ~expr->add_number; + } + + break; + + case EXPR_TYPE_LESSER: + + expr->add_number = (signed long) expr->add_number < (signed long) right_expr.add_number ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_LESSER_EQUAL: + + expr->add_number = (signed long) expr->add_number <= (signed long) right_expr.add_number ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_GREATER: + + expr->add_number = (signed long) expr->add_number > (signed long) right_expr.add_number ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_GREATER_EQUAL: + + expr->add_number = (signed long) expr->add_number >= (signed long) right_expr.add_number ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_ADD: + + expr->add_number += right_expr.add_number; + break; + + case EXPR_TYPE_SUBTRACT: + + expr->add_number -= right_expr.add_number; + break; + + case EXPR_TYPE_BIT_INCLUSIVE_OR: + + expr->add_number |= right_expr.add_number; + break; + + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + + expr->add_number ^= right_expr.add_number; + break; + + case EXPR_TYPE_BIT_AND: + + expr->add_number &= right_expr.add_number; + break; + + case EXPR_TYPE_MULTIPLY: + + expr->add_number *= right_expr.add_number; + break; + + case EXPR_TYPE_DIVIDE: + + expr->add_number /= right_expr.add_number; + break; + + case EXPR_TYPE_MODULUS: + + expr->add_number %= right_expr.add_number; + break; + + case EXPR_TYPE_LEFT_SHIFT: + + expr->add_number = (signed long) (expr->add_number) << (signed long) (right_expr.add_number); + break; + + case EXPR_TYPE_RIGHT_SHIFT: + + expr->add_number = (unsigned long) (expr->add_number) >> (unsigned long) (right_expr.add_number); + break; + + default: + + goto general_case; + + } + + } else if (expr->type == EXPR_TYPE_SYMBOL && right_expr.type == EXPR_TYPE_SYMBOL && (left_op == EXPR_TYPE_ADD || left_op == EXPR_TYPE_SUBTRACT || (expr->add_number == 0 && right_expr.add_number == 0))) { + + expr->type = left_op; + expr->op_symbol = right_expr.add_symbol; + + if (left_op == EXPR_TYPE_ADD) { + expr->add_number += right_expr.add_number; + } else if (left_op == EXPR_TYPE_SUBTRACT) { + + expr->add_number -= right_expr.add_number; + + if (ret_section == right_section && SECTION_IS_NORMAL (ret_section) && !symbol_force_reloc (expr->add_symbol) && !symbol_force_reloc (right_expr.add_symbol)) { + ret_section = right_section = absolute_section; + } + + } + + } else { + + general_case: + + expr->add_symbol = make_expr_symbol (expr); + expr->add_number = 0; + + expr->op_symbol = make_expr_symbol (&right_expr); + expr->type = left_op; + + } + + if (ret_section != right_section) { + + if (ret_section == undefined_section) { + /* Nothing is done. */ + } else if (right_section == undefined_section) { + ret_section = right_section; + } else if (ret_section == expr_section) { + /* Nothing is done. */ + } else if (right_section == expr_section) { + ret_section = right_section; + } else if (ret_section == reg_section) { + /* Nothing is done. */ + } else if (right_section == reg_section) { + ret_section = right_section; + } else if (right_section == absolute_section) { + /* Nothing is done. */ + } else if (ret_section == absolute_section) { + ret_section = right_section; + } else if (left_op == EXPR_TYPE_SUBTRACT) { + /* Nothing is done. */ + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "operation combines symbols in different sections"); + } + + } + + left_op = right_op; + + } + + if (rank == 0 && expr_mode == EXPR_MODE_EVALUATE) { + resolve_expression (expr); + } + + *pp = skip_whitespace (*pp); + return (expr->type == EXPR_TYPE_CONSTANT ? absolute_section : ret_section); + +} + +struct symbol *make_expr_symbol (struct expr *expr) { + + struct expr_symbol_line *es_line; + struct symbol *symbol; + + if (expr->type == EXPR_TYPE_SYMBOL && expr->add_number == 0) { + return expr->add_symbol; + } + + symbol = symbol_create (FAKE_LABEL_NAME, (expr->type == EXPR_TYPE_CONSTANT ? absolute_section : (expr->type == EXPR_TYPE_REGISTER ? reg_section : expr_section)), 0, &zero_address_frag); + symbol_set_value_expression (symbol, expr); + + es_line = xmalloc (sizeof (*es_line)); + + es_line->symbol = symbol; + get_filename_and_line_number (&(es_line->filename), &(es_line->line_number)); + + es_line->next = expr_symbol_lines; + expr_symbol_lines = es_line; + + return symbol; + +} + +signed long absolute_expression_read_into (char *start, char **pp, struct expr *expr) { + + expression_evaluate_and_read_into (start, pp, expr); + + if (expr->type != EXPR_TYPE_CONSTANT) { + + if (expr->type != EXPR_TYPE_ABSENT) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "bad or irreducible absolute expression"); + } + + expr->add_number = 0; + + } + + return expr->add_number; + +} + +int expr_symbol_get_filename_and_line_number (struct symbol *symbol, const char **filename_p, unsigned long *line_number_p) { + + struct expr_symbol_line *es_line; + + for (es_line = expr_symbol_lines; es_line; es_line = es_line->next) { + + if (symbol == es_line->symbol) { + + *filename_p = es_line->filename; + *line_number_p = es_line->line_number; + + return 0; + + } + + } + + return 1; + +} + +signed long get_result_of_absolute_expression (char *start, char **pp) { + + struct expr expr; + return absolute_expression_read_into (start, pp, &expr); + +} + +int resolve_expression (struct expr *expr) { + + struct symbol *op_symbol = expr->op_symbol; + struct frag *left_frag, *right_frag; + + struct symbol *add_symbol = expr->add_symbol; + struct symbol *original_add_symbol = add_symbol; + + unsigned long final_value = expr->add_number; + unsigned long left_value, right_value; + + struct section *left_section, *right_section; + signed long frag_offset; + + switch (expr->type) { + + case EXPR_TYPE_CONSTANT: + case EXPR_TYPE_REGISTER: + + left_value = 0; + break; + + case EXPR_TYPE_SYMBOL: + + if (get_symbol_snapshot (&add_symbol, &left_value, &left_section, &left_frag)) { + return 0; + } + + break; + + case EXPR_TYPE_LOGICAL_NOT: + case EXPR_TYPE_BIT_NOT: + case EXPR_TYPE_UNARY_MINUS: + + if (get_symbol_snapshot (&add_symbol, &left_value, &left_section, &left_frag)) { + return 0; + } + + if (left_section != absolute_section) { + return 0; + } + + switch (expr->type) { + + case EXPR_TYPE_LOGICAL_NOT: + + left_value = !left_value; + break; + + case EXPR_TYPE_BIT_NOT: + + left_value = ~left_value; + break; + + case EXPR_TYPE_UNARY_MINUS: + + left_value = -left_value; + break; + + default: + + break; + + } + + expr->type = EXPR_TYPE_CONSTANT; + break; + + case EXPR_TYPE_LOGICAL_OR: + case EXPR_TYPE_LOGICAL_AND: + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + case EXPR_TYPE_LESSER: + case EXPR_TYPE_LESSER_EQUAL: + case EXPR_TYPE_GREATER: + case EXPR_TYPE_GREATER_EQUAL: + case EXPR_TYPE_ADD: + case EXPR_TYPE_SUBTRACT: + case EXPR_TYPE_BIT_INCLUSIVE_OR: + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + case EXPR_TYPE_BIT_AND: + case EXPR_TYPE_MULTIPLY: + case EXPR_TYPE_DIVIDE: + case EXPR_TYPE_MODULUS: + case EXPR_TYPE_LEFT_SHIFT: + case EXPR_TYPE_RIGHT_SHIFT: + + if (get_symbol_snapshot (&add_symbol, &left_value, &left_section, &left_frag) || get_symbol_snapshot (&op_symbol, &right_value, &right_section, &right_frag)) { + return 0; + } + + if (expr->type == EXPR_TYPE_ADD) { + + if (right_section == absolute_section) { + + final_value += right_value; + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } else if (left_section == absolute_section) { + + final_value += left_value; + left_value = right_value; + + left_section = right_section; + add_symbol = op_symbol; + + original_add_symbol = expr->op_symbol; + expr->type = EXPR_TYPE_SYMBOL; + + break; + + } + + } else if (expr->type == EXPR_TYPE_SUBTRACT) { + + if (right_section == absolute_section) { + + final_value -= right_value; + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } + + } + + frag_offset = 0; + + /** + * Equality and non-equality operations are allowed on everything. + * Subtraction and other comparison operators are allowed if both operands are in the same section. + * Bit OR, bit AND and multiplications are permitted with constant zero are permitted on anything. + * Shifts by constant zero are permitted on anything. + * Multiplication and division by constant one are permitted on anything. + * Bit OR and AND are permitted on two same undefined symbols. + * For everything else, both operands must be absolute. + * Addition and subtraction of constants is handled above. + */ + if (!(left_section == absolute_section && + right_section == absolute_section) && + !(expr->type == EXPR_TYPE_EQUAL || + expr->type == EXPR_TYPE_NOT_EQUAL) && + !((expr->type == EXPR_TYPE_SUBTRACT || + expr->type == EXPR_TYPE_LESSER || + expr->type == EXPR_TYPE_LESSER_EQUAL || + expr->type == EXPR_TYPE_GREATER || + expr->type == EXPR_TYPE_GREATER_EQUAL) && + left_section == right_section && + (finalize_symbols || + frags_offset_is_fixed (left_frag, right_frag, &frag_offset) || + (expr->type == EXPR_TYPE_GREATER && + frags_is_greater_than_offset (left_value, left_frag, right_value, right_frag, &frag_offset))) && + (left_section != undefined_section || + add_symbol == op_symbol))) + { + + if ((left_section == absolute_section && left_value == 0) || (right_section == absolute_section && right_value == 0)) { + + if (expr->type == EXPR_TYPE_BIT_INCLUSIVE_OR || expr->type == EXPR_TYPE_BIT_EXCLUSIVE_OR) { + + if (!(right_section == absolute_section && right_value == 0)) { + + left_value = right_value; + + left_section = right_section; + add_symbol = op_symbol; + + original_add_symbol = expr->op_symbol; + + } + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } else if (expr->type == EXPR_TYPE_LEFT_SHIFT || expr->type == EXPR_TYPE_RIGHT_SHIFT) { + + if (!(left_section == absolute_section && left_value == 0)) { + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } + + } else if (expr->type != EXPR_TYPE_BIT_AND && expr->type != EXPR_TYPE_MULTIPLY) { + return 0; + } + + } else if (expr->type == EXPR_TYPE_MULTIPLY && left_section == absolute_section && left_value == 1) { + + left_value = right_value; + + left_section = right_section; + add_symbol = op_symbol; + + original_add_symbol = expr->op_symbol; + break; + + } else if ((expr->type == EXPR_TYPE_MULTIPLY || expr->type == EXPR_TYPE_DIVIDE) && right_section == absolute_section && right_value == 1) { + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } else if (!(left_value == right_value && ((left_section == reg_section && right_section == reg_section) || (left_section == undefined_section && right_section == undefined_section && add_symbol == op_symbol)))) { + return 0; + } else if (expr->type == EXPR_TYPE_BIT_INCLUSIVE_OR || expr->type == EXPR_TYPE_BIT_AND) { + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } else if (expr->type != EXPR_TYPE_BIT_EXCLUSIVE_OR) { + return 0; + } + + } + + right_value += frag_offset; + + switch (expr->type) { + + case EXPR_TYPE_LOGICAL_OR: + + left_value = left_value || right_value; + break; + + case EXPR_TYPE_LOGICAL_AND: + + left_value = left_value && right_value; + break; + + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + + left_value = ((left_value == right_value && left_section == right_section && (finalize_symbols || left_frag == right_frag) && (left_section != undefined_section || add_symbol == op_symbol)) ? ~(unsigned long) 0 : 0); + + if (expr->type == EXPR_TYPE_NOT_EQUAL) { + left_value = ~left_value; + } + + break; + + case EXPR_TYPE_LESSER: + + left_value = (signed long) left_value < (signed long) right_value ? ~(unsigned long) 0 : 0; + break; + + case EXPR_TYPE_LESSER_EQUAL: + + left_value = (signed long) left_value <= (signed long) right_value ? ~(unsigned long) 0 : 0; + break; + + case EXPR_TYPE_GREATER: + + left_value = (signed long) left_value > (signed long) right_value ? ~(unsigned long) 0 : 0; + break; + + case EXPR_TYPE_GREATER_EQUAL: + + left_value = (signed long) left_value >= (signed long) right_value ? ~(unsigned long) 0 : 0; + break; + + case EXPR_TYPE_ADD: + + left_value += right_value; + break; + + case EXPR_TYPE_SUBTRACT: + + left_value -= right_value; + break; + + case EXPR_TYPE_BIT_INCLUSIVE_OR: + + left_value |= right_value; + break; + + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + + left_value ^= right_value; + break; + + case EXPR_TYPE_BIT_AND: + + left_value &= right_value; + break; + + case EXPR_TYPE_MULTIPLY: + + left_value *= right_value; + break; + + case EXPR_TYPE_DIVIDE: + + if (right_value == 0) { + return 0; + } + + left_value = (signed long) left_value / (signed long) right_value; + break; + + case EXPR_TYPE_MODULUS: + + if (right_value == 0) { + return 0; + } + + left_value = (signed long) left_value % (signed long) right_value; + break; + + case EXPR_TYPE_LEFT_SHIFT: + + left_value = (unsigned long) left_value << (unsigned long) right_value; + break; + + case EXPR_TYPE_RIGHT_SHIFT: + + left_value = (unsigned long) left_value >> (unsigned long) right_value; + break; + + default: + + break; + + } + + expr->type = EXPR_TYPE_CONSTANT; + break; + + default: + + return 0; + + } + + if (expr->type == EXPR_TYPE_SYMBOL) { + + if (left_section == absolute_section) { + expr->type = EXPR_TYPE_CONSTANT; + } else if (left_section == reg_section && final_value == 0) { + expr->type = EXPR_TYPE_REGISTER; + } else if (add_symbol != original_add_symbol) { + final_value += left_value; + } + + expr->add_symbol = add_symbol; + + } + + if (expr->type == EXPR_TYPE_CONSTANT) { + final_value += left_value; + } + + expr->add_number = final_value; + return 1; + +} diff --git a/expr.h b/expr.h new file mode 100644 index 0000000..eb99d62 --- /dev/null +++ b/expr.h @@ -0,0 +1,87 @@ +/****************************************************************************** + * @file expr.h + *****************************************************************************/ +#ifndef _EXPR_H +#define _EXPR_H + +enum expr_type { + + EXPR_TYPE_INVALID, + EXPR_TYPE_ABSENT, + EXPR_TYPE_CONSTANT, + EXPR_TYPE_SYMBOL, + EXPR_TYPE_REGISTER, + EXPR_TYPE_INDEX, + EXPR_TYPE_LOGICAL_OR, + EXPR_TYPE_LOGICAL_AND, + EXPR_TYPE_EQUAL, + EXPR_TYPE_NOT_EQUAL, + EXPR_TYPE_LESSER, + EXPR_TYPE_LESSER_EQUAL, + EXPR_TYPE_GREATER, + EXPR_TYPE_GREATER_EQUAL, + EXPR_TYPE_ADD, + EXPR_TYPE_SUBTRACT, + EXPR_TYPE_BIT_INCLUSIVE_OR, + EXPR_TYPE_BIT_EXCLUSIVE_OR, + EXPR_TYPE_BIT_AND, + EXPR_TYPE_MULTIPLY, + EXPR_TYPE_DIVIDE, + EXPR_TYPE_MODULUS, + EXPR_TYPE_LEFT_SHIFT, + EXPR_TYPE_RIGHT_SHIFT, + EXPR_TYPE_LOGICAL_NOT, + EXPR_TYPE_BIT_NOT, + EXPR_TYPE_UNARY_MINUS, + + /* Machine dependent operators. */ + EXPR_TYPE_MACHINE_DEPENDENT_0, + EXPR_TYPE_MACHINE_DEPENDENT_1, + EXPR_TYPE_MACHINE_DEPENDENT_2, + EXPR_TYPE_MACHINE_DEPENDENT_3, + EXPR_TYPE_MACHINE_DEPENDENT_4, + EXPR_TYPE_MACHINE_DEPENDENT_5, + EXPR_TYPE_MACHINE_DEPENDENT_6, + EXPR_TYPE_MACHINE_DEPENDENT_7, + EXPR_TYPE_MACHINE_DEPENDENT_8, + EXPR_TYPE_MACHINE_DEPENDENT_9, + EXPR_TYPE_MACHINE_DEPENDENT_10, + EXPR_TYPE_MACHINE_DEPENDENT_11, + EXPR_TYPE_MACHINE_DEPENDENT_12, + EXPR_TYPE_MACHINE_DEPENDENT_13, + + /* How many expression types exist. */ + EXPR_TYPE_MAX + +}; + +#define EXPR_MODE_NORMAL 0 +#define EXPR_MODE_EVALUATE 1 + +struct expr { + + enum expr_type type; + + struct symbol *add_symbol; + struct symbol *op_symbol; + + unsigned long add_number; + +}; + +#define expression_evaluate_and_read_into(start, pp, expr) (read_into ((start), (pp), (expr), 0, EXPR_MODE_EVALUATE)) +#define expression_read_into(start, pp, expr) (read_into ((start), (pp), (expr), 0, EXPR_MODE_NORMAL)) + +struct section *current_location (struct expr *expr); +struct section *read_into (char *start, char **pp, struct expr *expr, unsigned int rank, int expr_mode); + +struct symbol *make_expr_symbol (struct expr *expr); +void expr_type_set_rank (enum expr_type expr_type, unsigned int rank); + +signed long absolute_expression_read_into (char *start, char **pp, struct expr *expr); +signed long get_result_of_absolute_expression (char *start, char **pp); + +int expr_symbol_get_filename_and_line_number (struct symbol *symbol, const char **filename_p, unsigned long *line_number_p); +int resolve_expression (struct expr *expr); + +#endif /* _EXPR_H */ diff --git a/fixup.c b/fixup.c new file mode 100644 index 0000000..379fec1 --- /dev/null +++ b/fixup.c @@ -0,0 +1,627 @@ +/****************************************************************************** + * @file fixup.c + *****************************************************************************/ +#include + +#include "as.h" +#include "expr.h" +#include "fixup.h" +#include "frag.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +static struct fixup *fixup_new_internal (struct frag *frag, unsigned long where, int size, struct symbol *add_symbol, struct symbol *sub_symbol, long add_number, int pcrel, int reloc_type) { + + struct fixup *fixup = xmalloc (sizeof (*fixup)); + + fixup->frag = frag; + fixup->size = size; + fixup->add_symbol = add_symbol; + fixup->sub_symbol = sub_symbol; + fixup->pcrel = pcrel; + fixup->reloc_type = reloc_type; + fixup->where = where; + fixup->add_number = add_number; + + if (current_frag_chain->last_fixup) { + + current_frag_chain->last_fixup->next = fixup; + current_frag_chain->last_fixup = fixup; + + } else { + current_frag_chain->last_fixup = current_frag_chain->first_fixup = fixup; + } + + return fixup; + +} + +struct fixup *fixup_new (struct frag *frag, unsigned long where, int size, struct symbol *add_symbol, long add_number, int pcrel, int reloc_type) { + return fixup_new_internal (frag, where, size, add_symbol, 0, add_number, pcrel, reloc_type); +} + +struct fixup *fixup_new_expr (struct frag *frag, unsigned long where, int size, struct expr *expr, int pcrel, int reloc_type) { + + struct symbol *add_symbol = 0, *sub_symbol = 0; + signed long add_number = 0; + + switch (expr->type) { + + case EXPR_TYPE_ABSENT: + + break; + + case EXPR_TYPE_CONSTANT: + + add_number = expr->add_number; + break; + + case EXPR_TYPE_SUBTRACT: + + sub_symbol = expr->op_symbol; + /* fall through */ + + case EXPR_TYPE_SYMBOL: + + add_symbol = expr->add_symbol; + add_number = expr->add_number; + + break; + + default: + + add_symbol = make_expr_symbol (expr); + break; + + } + + return fixup_new_internal (frag, where, size, add_symbol, sub_symbol, add_number, pcrel, reloc_type); + +} + + +signed long machine_dependent_estimate_size_before_relax (struct frag *frag, struct section *section); +signed long machine_dependent_relax_frag (struct frag *frag, struct section *section, signed long change); + +void machine_dependent_finish_frag (struct frag *frag); + +static unsigned long relax_align (unsigned long address, unsigned long alignment) { + + unsigned long mask, new_address; + + mask = ~(~((unsigned int) 0) << alignment); + new_address = (address + mask) & ~mask; + + return new_address - address; + +} + +static void relax_section (struct section *section) { + + struct frag *root_frag, *frag; + int changed; + + unsigned long address, frag_count, max_iterations; + unsigned long alignment_needed; + + section_set (section); + + root_frag = current_frag_chain->first_frag; + address = 0; + + for (frag_count = 0, frag = root_frag; frag; frag_count++, frag = frag->next) { + + frag->relax_marker = 0; + frag->address = address; + + address += frag->fixed_size; + + switch (frag->relax_type) { + + case RELAX_TYPE_NONE_NEEDED: + + break; + + case RELAX_TYPE_ALIGN: + case RELAX_TYPE_ALIGN_CODE: + + alignment_needed = relax_align (address, frag->offset); + + if (frag->relax_subtype != 0 && alignment_needed > frag->relax_subtype) { + alignment_needed = 0; + } + + address += alignment_needed; + break; + + case RELAX_TYPE_ORG: + case RELAX_TYPE_SPACE: + + break; + + case RELAX_TYPE_MACHINE_DEPENDENT: + + address += machine_dependent_estimate_size_before_relax (frag, section); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "%s: %lu: invalid relax type", frag->filename, frag->line_number); + exit (EXIT_FAILURE); + + } + + } + + /** + * Prevents an infinite loop caused by frag growing because of a symbol that moves when the frag grows. + * + * Example: + * + * .org _abc + 2 + * _abc: + */ + max_iterations = frag_count * frag_count; + + /* Too many frags might cause an overflow. */ + if (max_iterations < frag_count) { + max_iterations = frag_count; + } + + do { + + long change = 0; + changed = 0; + + for (frag = root_frag; frag; frag = frag->next) { + + long growth = 0; + unsigned long old_address; + + unsigned long old_offset; + unsigned long new_offset; + + frag->relax_marker = !frag->relax_marker; + + old_address = frag->address; + frag->address += change; + + switch (frag->relax_type) { + + case RELAX_TYPE_NONE_NEEDED: + + growth = 0; + break; + + case RELAX_TYPE_ALIGN: + case RELAX_TYPE_ALIGN_CODE: + + old_offset = relax_align (old_address + frag->fixed_size, frag->offset); + new_offset = relax_align (frag->address + frag->fixed_size, frag->offset); + + if (frag->relax_subtype != 0) { + + if (old_offset > frag->relax_subtype) { + old_offset = 0; + } + + if (new_offset > frag->relax_subtype) { + new_offset = 0; + } + + } + + growth = new_offset - old_offset; + break; + + case RELAX_TYPE_ORG: { + + struct symbol *symbol = state->end_symbol; + unsigned long target = frag->offset; + + if (frag->symbol) { + target += symbol_get_value (frag->symbol); + } + + if (symbol) { + + if (frag != symbol->frag) { + + frag->relax_type = RELAX_TYPE_NONE_NEEDED; + growth = 0; + + frag->next->address += target; + break; + + } + + } else if (state->format == AS_OUTPUT_BIN) { + + if (frag->offset == root_frag->offset) { + + frag->relax_type = RELAX_TYPE_NONE_NEEDED; + growth = 0; + + frag->next->address += target; + break; + + } + + frag->relax_type = RELAX_TYPE_NONE_NEEDED; + report_at (frag->filename, frag->line_number, REPORT_ERROR, "program origin redefined"); + + } + + growth = target - (frag->next->address + change); + + if (frag->address + frag->fixed_size > target) { + + report_at (frag->filename, frag->line_number, REPORT_ERROR, "attempt to move .org backwards"); + growth = 0; + + /* Changes the frag so no more errors appear because of it. */ + frag->relax_type = RELAX_TYPE_ALIGN; + frag->offset = 0; + frag->fixed_size = frag->next->address + change - frag->address; + + } + + break; + + } + + case RELAX_TYPE_SPACE: + + growth = 0; + + if (frag->symbol) { + + long amount = symbol_get_value (frag->symbol); + + if (symbol_get_section (frag->symbol) != absolute_section || symbol_is_undefined (frag->symbol)) { + + report_at (frag->filename, frag->line_number, REPORT_WARNING, ".space specifies non-absolute value"); + + /* Prevents the error from repeating. */ + frag->symbol = 0; + + } else if (amount < 0) { + + report_at (frag->filename, frag->line_number, REPORT_WARNING, ".space with negative value, ignoring"); + frag->symbol = 0; + + } else { + growth = old_address + frag->fixed_size + amount - frag->next->address; + } + + } + + break; + + case RELAX_TYPE_MACHINE_DEPENDENT: + + growth = machine_dependent_relax_frag (frag, section, change); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "%s: %lu: invalid relax type", frag->filename, frag->line_number); + exit (EXIT_FAILURE); + + } + + if (growth) { + + change += growth; + changed = 1; + + } + + } + + } while (changed && --max_iterations); + + if (changed) { + + report_at (0, 0, REPORT_FATAL_ERROR, "Infinite loop encountered whilst attempting to compute the addresses in section %s", section_get_name (section)); + exit (EXIT_FAILURE); + + } + +} + +static void finish_frags_after_relaxation (struct section *section) { + + struct frag *root_frag, *frag; + + section_set (section); + root_frag = current_frag_chain->first_frag; + + for (frag = root_frag; frag; frag = frag->next) { + + switch (frag->relax_type) { + + case RELAX_TYPE_NONE_NEEDED: + + break; + + case RELAX_TYPE_ORG: + case RELAX_TYPE_ALIGN: + case RELAX_TYPE_ALIGN_CODE: + case RELAX_TYPE_SPACE: { + + signed long i; + + unsigned char *p; + unsigned char fill; + + frag->offset = frag->next->address - (frag->address + frag->fixed_size); + + if (((long) (frag->offset)) < 0) { + + report_at (frag->filename, frag->line_number, REPORT_ERROR, "attempt to .org/.space backward (%lu)", frag->offset); + frag->offset = 0; + + } + + p = finished_frag_increase_fixed_size_by_frag_offset (frag); + fill = *p; + + for (i = 0; i < frag->offset; i++) { + p[i] = fill; + } + + break; + + } + + case RELAX_TYPE_MACHINE_DEPENDENT: + + machine_dependent_finish_frag (frag); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "invalid relax type"); + exit (EXIT_FAILURE); + + } + + } + +} + +static void adjust_reloc_symbols_of_section (struct section *section) { + + struct fixup *fixup; + section_set (section); + + for (fixup = current_frag_chain->first_fixup; fixup; fixup = fixup->next) { + + if (fixup->done) { + continue; + } + + if (fixup->add_symbol) { + + struct symbol *symbol = fixup->add_symbol; + struct section *symbol_section; + + /* Resolves symbols that have not been resolved yet (expression symbols). */ + symbol_resolve_value (symbol); + + if (fixup->sub_symbol) { + symbol_resolve_value (fixup->sub_symbol); + } + + if (symbol_uses_reloc_symbol (symbol)) { + + fixup->add_number += symbol_get_value_expression (symbol)->add_number; + + symbol = symbol_get_value_expression (symbol)->add_symbol; + fixup->add_symbol = symbol; + + } + + if (symbol_force_reloc (symbol)) { + continue; + } + + symbol_section = symbol_get_section (symbol); + + if (symbol_section == absolute_section) { + continue; + } + + fixup->add_number += symbol_get_value (symbol); + fixup->add_symbol = section_symbol (symbol_get_section (symbol)); + + } + + } + +} + +signed long machine_dependent_pcrel_from (struct fixup *fixup); + +int machine_dependent_force_relocation_local (struct fixup *fixup); +void machine_dependent_apply_fixup (struct fixup *fixup, unsigned long value); + +static unsigned long fixup_section (struct section *section) { + + unsigned long section_reloc_count = 0; + unsigned long add_number; + + struct section *add_symbol_section = absolute_section; + struct fixup *fixup; + + section_set (section); + + for (fixup = current_frag_chain->first_fixup; fixup; fixup = fixup->next) { + + add_number = fixup->add_number; + + if (fixup->add_symbol) { + add_symbol_section = symbol_get_section (fixup->add_symbol); + } + + if (fixup->sub_symbol) { + + struct section *sub_symbol_section; + + symbol_resolve_value (fixup->sub_symbol); + sub_symbol_section = symbol_get_section (fixup->sub_symbol); + + if (fixup->add_symbol && add_symbol_section == sub_symbol_section && !symbol_force_reloc (fixup->add_symbol) && !symbol_force_reloc (fixup->add_symbol)) { + + add_number += symbol_get_value (fixup->add_symbol); + add_number -= symbol_get_value (fixup->sub_symbol); + + fixup->add_number = add_number; + fixup->add_symbol = 0; + fixup->sub_symbol = 0; + + } else if (sub_symbol_section == section) { + + add_number -= symbol_get_value (fixup->sub_symbol); + + if (!fixup->pcrel) { + add_number += machine_dependent_pcrel_from (fixup); + } + + fixup->sub_symbol = 0; + fixup->pcrel = 1; + + } else { + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "+++fixup_section sub_symbol"); + exit (EXIT_FAILURE); + + } + + } + + if (fixup->add_symbol) { + + if ((add_symbol_section == section) && !machine_dependent_force_relocation_local (fixup)) { + + add_number += symbol_get_value (fixup->add_symbol); + fixup->add_number = add_number; + + if (fixup->pcrel) { + + add_number -= machine_dependent_pcrel_from (fixup); + fixup->pcrel = 0; + + } + + fixup->add_symbol = 0; + + } else if (add_symbol_section == absolute_section || (fixup->reloc_type == RELOC_TYPE_FAR_CALL && !symbol_is_undefined (fixup->add_symbol))) { + + add_number += symbol_get_value (fixup->add_symbol); + + fixup->add_number = add_number; + fixup->add_symbol = 0; + + } + + } + + if (fixup->pcrel) { + + add_number -= machine_dependent_pcrel_from (fixup); + + if (!fixup->add_symbol && !fixup->done) { + fixup->add_symbol = section_symbol (absolute_section); + } + + } + + machine_dependent_apply_fixup (fixup, add_number); + + if (!fixup->done) { + section_reloc_count++; + } + + if (fixup->size < sizeof (unsigned long)) { + + unsigned long mask = -1; + mask <<= fixup->size * 8 - !!fixup->fixup_signed; + + if ((add_number & mask) && (fixup->fixup_signed ? ((add_number & mask) != mask) : (-add_number & mask))) { + + report_at (0, 0, REPORT_ERROR, (add_number > 1000) + ? "value of %lu too large for field of %u byte%s at %#lx" : "value of %lu too large for field of %u byte%s at %#lx", + add_number, fixup->size, ((fixup->size == 1) ? "" : "s"), fixup->frag->address + fixup->where); + + } + + } + + } + + return section_reloc_count; + +} + +void fixup_code (void) { + + struct section *section; + struct symbol *symbol; + + for (section = sections; section; section = section_get_next_section (section)) { + relax_section (section); + } + + for (section = sections; section; section = section_get_next_section (section)) { + finish_frags_after_relaxation (section); + } + + { + + unsigned long address, text_section_size; + struct frag *frag; + + section_set (text_section); + + text_section_size = current_frag_chain->last_frag->address + current_frag_chain->last_frag->fixed_size; + address = text_section_size; + + section_set (data_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + frag->address = address; + address += frag->fixed_size; + + } + + section_set (bss_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + frag->address = address; + address += frag->fixed_size; + + } + + } + + finalize_symbols = 1; + + for (symbol = symbols; symbol; symbol = symbol->next) { + symbol_resolve_value (symbol); + } + + for (section = sections; section; section = section_get_next_section (section)) { + adjust_reloc_symbols_of_section (section); + } + + for (section = sections; section; section = section_get_next_section (section)) { + fixup_section (section); + } + +} diff --git a/fixup.h b/fixup.h new file mode 100644 index 0000000..833117f --- /dev/null +++ b/fixup.h @@ -0,0 +1,31 @@ +/****************************************************************************** + * @file fixup.h + *****************************************************************************/ +#ifndef _FIXUP_H +#define _FIXUP_H + +#include "expr.h" + +#define RELOC_TYPE_DEFAULT 0 +#define RELOC_TYPE_FAR_CALL 1 + +struct fixup { + + struct frag *frag; + int done, fixup_signed; + + unsigned long where; + unsigned int size; + + struct symbol *add_symbol, *sub_symbol; + long add_number; + + int pcrel, reloc_type; + struct fixup *next; + +}; + +struct fixup *fixup_new (struct frag *frag, unsigned long where, int size, struct symbol *add_symbol, long add_number, int pcrel, int reloc_type); +struct fixup *fixup_new_expr (struct frag *frag, unsigned long where, int size, struct expr *expr, int pcrel, int reloc_type); + +#endif /* _FIXUP_H */ diff --git a/frag.c b/frag.c new file mode 100644 index 0000000..a0e7581 --- /dev/null +++ b/frag.c @@ -0,0 +1,197 @@ +/****************************************************************************** + * @file frag.c + *****************************************************************************/ +#include "frag.h" +#include "lib.h" +#include "section.h" + +struct frag zero_address_frag = { 0 }; +struct frag *current_frag = 0; + +struct frag *frag_alloc (void) { + return xmalloc (sizeof (struct frag)); +} + +int frags_is_greater_than_offset (unsigned long offset2, struct frag *frag2, unsigned long offset1, struct frag *frag1, signed long *offset_p) { + + signed long difference; + struct frag *frag; + + /* Checks for something that should be impossible. */ + if (frag2 == frag1 || offset1 > frag1->fixed_size) { + return 0; + } + + difference = offset2 - offset1; + + for (frag = frag1;;) { + + difference += frag->fixed_size; + frag = frag->next; + + if (frag == frag2) { + + if (difference == 0) { + return 0; + } + + break; + + } + + if (!frag) { + return 0; + } + + } + + *offset_p = offset2 - offset1 - difference; + return 1; + +} + +int frags_offset_is_fixed (struct frag *frag1, struct frag *frag2, signed long *offset_p) { + + signed long offset = frag1->address - frag2->address; + struct frag *frag; + + if (frag1 == frag2) { + + *offset_p = offset; + return 1; + + } + + /* Checks if frag2 is after frag1. */ + frag = frag1; + + while (frag->relax_type == RELAX_TYPE_NONE_NEEDED) { + + offset += frag->fixed_size; + + if (!(frag = frag->next)) { + break; + } + + if (frag == frag2) { + + *offset_p = offset; + return 1; + + } + + } + + /* Checks if frag1 is after frag2. */ + offset = frag1->address - frag2->address; + frag = frag2; + + while (frag->relax_type == RELAX_TYPE_NONE_NEEDED) { + + offset -= frag->fixed_size; + + if (!(frag = frag->next)) { + break; + } + + if (frag == frag1) { + + *offset_p = offset; + return 1; + + } + + } + + return 0; + +} + +unsigned char *finished_frag_increase_fixed_size_by_frag_offset (struct frag *frag) { + + frag->fixed_size += frag->offset; + + if (frag->fixed_size > frag->size) { + + frag->buf = xrealloc (frag->buf, frag->fixed_size); + frag->size = frag->fixed_size; + + } + + return (frag->buf + frag->fixed_size - frag->offset); + +} + +unsigned char *frag_alloc_space (unsigned long space) { + + if (current_frag->fixed_size + space >= current_frag->size) { + + current_frag->size += ((space > FRAG_BUF_REALLOC_STEP) ? space : FRAG_BUF_REALLOC_STEP); + current_frag->buf = xrealloc (current_frag->buf, current_frag->size); + + } + + return current_frag->buf + current_frag->fixed_size; + +} + +unsigned char *frag_increase_fixed_size (unsigned long increase) { + + frag_alloc_space (increase); + + current_frag->fixed_size += increase; + return (current_frag->buf + current_frag->fixed_size - increase); + +} + +void frag_align (signed long alignment, int fill_char, signed long max_bytes_to_skip) { + + (frag_alloc_space (1 << alignment))[0] = fill_char; + frag_set_as_variant (RELAX_TYPE_ALIGN, max_bytes_to_skip, 0, alignment, 0); + +} + +void frag_align_code (signed long alignment, signed long max_bytes_to_skip) { + + (frag_alloc_space (1 << alignment))[0] = 0x90; + frag_set_as_variant (RELAX_TYPE_ALIGN_CODE, max_bytes_to_skip, 0, alignment, 0); + +} + +void frag_append_1_char (unsigned char ch) { + + if (current_frag->fixed_size == current_frag->size) { + + current_frag->size += FRAG_BUF_REALLOC_STEP; + current_frag->buf = xrealloc (current_frag->buf, current_frag->size); + + } + + current_frag->buf[current_frag->fixed_size++] = ch; + +} + +void frag_new (void) { + + struct frag *prev_frag = current_frag; + + current_frag = frag_alloc (); + current_frag->relax_type = RELAX_TYPE_NONE_NEEDED; + + prev_frag->next = current_frag; + current_frag_chain->last_frag = current_frag; + +} + +void frag_set_as_variant (int relax_type, int relax_subtype, struct symbol *symbol, signed long offset, unsigned long opcode_offset_in_buf) { + + current_frag->relax_type = relax_type; + current_frag->relax_subtype = relax_subtype; + current_frag->symbol = symbol; + current_frag->offset = offset; + current_frag->opcode_offset_in_buf = opcode_offset_in_buf; + + get_filename_and_line_number (&(current_frag->filename), &(current_frag->line_number)); + frag_new (); + +} diff --git a/frag.h b/frag.h new file mode 100644 index 0000000..737b8a2 --- /dev/null +++ b/frag.h @@ -0,0 +1,54 @@ +/****************************************************************************** + * @file frag.h + *****************************************************************************/ +#ifndef _FRAG_H +#define _FRAG_H + +#define RELAX_TYPE_NONE_NEEDED 0 +#define RELAX_TYPE_ALIGN 1 +#define RELAX_TYPE_ALIGN_CODE 2 +#define RELAX_TYPE_ORG 3 +#define RELAX_TYPE_SPACE 4 +#define RELAX_TYPE_MACHINE_DEPENDENT 5 + +struct frag { + + unsigned long fixed_size, address, size; + unsigned int relax_type, relax_subtype; + + struct symbol *symbol; + unsigned char *buf; + + signed long offset; + unsigned long opcode_offset_in_buf; + + const char *filename; + unsigned long line_number; + + int relax_marker; + struct frag *next; + +}; + +extern struct frag zero_address_frag; +extern struct frag *current_frag; + +#define FRAG_BUF_REALLOC_STEP 16 + +struct frag *frag_alloc (void); + +int frags_is_greater_than_offset (unsigned long offset2, struct frag *frag2, unsigned long offset1, struct frag *frag1, signed long *offset_p); +int frags_offset_is_fixed (struct frag *frag1, struct frag *frag2, signed long *offset_p); + +unsigned char *finished_frag_increase_fixed_size_by_frag_offset (struct frag *frag); +unsigned char *frag_alloc_space (unsigned long space); +unsigned char *frag_increase_fixed_size (unsigned long increase); + +void frag_align (signed long alignment, int fill_char, signed long max_bytes_to_skip); +void frag_align_code (signed long alignment, signed long max_bytes_to_skip); + +void frag_append_1_char (unsigned char ch); +void frag_new (void); +void frag_set_as_variant (int relax_type, int relax_subtype, struct symbol *symbol, signed long offset, unsigned long opcode_offset_in_buf); + +#endif /* _FRAG_H */ diff --git a/hashtab.c b/hashtab.c new file mode 100644 index 0000000..b6863a5 --- /dev/null +++ b/hashtab.c @@ -0,0 +1,215 @@ +/****************************************************************************** + * @file hashtab.c + *****************************************************************************/ +#include +#include +#include + +#include "hashtab.h" + +static struct hashtab_entry *find_entry (struct hashtab_entry *entries, unsigned int capacity, struct hashtab_name *key); + +static int adjust_capacity (struct hashtab *table, unsigned int new_capacity) { + + struct hashtab_entry *new_entries, *old_entries; + unsigned int i, new_count, old_capacity; + + if ((new_entries = malloc (sizeof (*new_entries) * new_capacity)) == NULL) { + return -2; + } + + for (i = 0; i < new_capacity; i++) { + + struct hashtab_entry *entry = &new_entries[i]; + + entry->key = NULL; + entry->value = NULL; + + } + + old_entries = table->entries; + old_capacity = table->capacity; + + new_count = 0; + + for (i = 0; i < old_capacity; i++) { + + struct hashtab_entry *entry = &old_entries[i], *dest; + + if (entry->key == NULL) { + continue; + } + + dest = find_entry (new_entries, new_capacity, entry->key); + + dest->key = entry->key; + dest->value = entry->value; + + new_count++; + + } + + free (old_entries); + + table->capacity = new_capacity; + table->count = new_count; + table->entries = new_entries; + table->used = new_count; + + return 0; + +} + +static struct hashtab_entry *find_entry (struct hashtab_entry *entries, unsigned int capacity, struct hashtab_name *key) { + + struct hashtab_entry *tombstone = NULL; + unsigned int index; + + for (index = key->hash % capacity; ; index = (index + 1) % capacity) { + + struct hashtab_entry *entry = &entries[index]; + + if (entry->key == NULL) { + + if (entry->value == NULL) { + + if (tombstone == NULL) { + return entry; + } + + return tombstone; + + } else if (tombstone == NULL) { + tombstone = entry; + } + + } else if (entry->key->bytes == key->bytes) { + + if (memcmp (entry->key->chars, key->chars, key->bytes) == 0 && entry->key->hash == key->hash) { + return entry; + } + + } + + } + +} + +static unsigned int hash_string (const void *p, unsigned int length) { + + unsigned char *str = (unsigned char *) p; + unsigned int i, result = 0; + + for (i = 0; i < length; i++) { + result = (((unsigned int) str[i]) << 12) + (result >> 6) + result + (result >> 3) + (((unsigned int) str[i]) << 8) - result; + } + + return result; + +} + +struct hashtab_name *hashtab_alloc_name (const char *str) { + + struct hashtab_name *name; + unsigned int bytes = strlen (str), hash = hash_string (str, bytes); + + if ((name = malloc (sizeof (*name))) == NULL) { + return NULL; + } + + name->bytes = bytes; + name->chars = str; + name->hash = hash; + + return name; + +} + +struct hashtab_name *hashtab_get_key (struct hashtab *table, const char *name) { + + struct hashtab_name *key; + struct hashtab_entry *entry; + + if (table == NULL || table->count == 0 || !(key = hashtab_alloc_name (name))) { + return 0; + } + + entry = find_entry (table->entries, table->capacity, key); + free (key); + + return entry->key; + +} + +void *hashtab_get (struct hashtab *table, struct hashtab_name *key) { + + struct hashtab_entry *entry; + + if (table == NULL || table->count == 0) { + return NULL; + } + + entry = find_entry (table->entries, table->capacity, key); + + if (entry->key == NULL) { + return NULL; + } + + return entry->value; + +} + +int hashtab_put (struct hashtab *table, struct hashtab_name *key, void *value) { + + const int MIN_CAPACITY = 15; + + struct hashtab_entry *entry; + int ret = 0; + + if (table->used >= table->capacity / 2) { + + int capacity = table->capacity * 2 - 1; + + if (capacity < MIN_CAPACITY) { + capacity = MIN_CAPACITY; + } + + if ((ret = adjust_capacity (table, capacity))) { + return ret; + } + + } + + entry = find_entry (table->entries, table->capacity, key); + + if (entry->key == NULL) { + + table->count++; + + if (entry->value == NULL) { + table->used++; + } + + } + + entry->key = key; + entry->value = value; + + return 0; + +} + +void hashtab_remove (struct hashtab *table, struct hashtab_name *key) { + + struct hashtab_entry *entry; + + if ((entry = find_entry (table->entries, table->capacity, key)) != NULL) { + + entry->key = NULL; + entry->value = NULL; + + --table->count; + + } + +} diff --git a/hashtab.h b/hashtab.h new file mode 100644 index 0000000..47aab3b --- /dev/null +++ b/hashtab.h @@ -0,0 +1,36 @@ +/****************************************************************************** + * @file hashtab.h + *****************************************************************************/ +#ifndef _HASHTAB_H +#define _HASHTAB_H + +struct hashtab_name { + + const char *chars; + int bytes, hash; + +}; + +struct hashtab_entry { + + struct hashtab_name *key; + void *value; + +}; + +struct hashtab { + + struct hashtab_entry *entries; + int capacity, count, used; + +}; + +struct hashtab_name *hashtab_alloc_name (const char *str); +struct hashtab_name *hashtab_get_key (struct hashtab *table, const char *name); + +void *hashtab_get (struct hashtab *table, struct hashtab_name *key); + +int hashtab_put (struct hashtab *table, struct hashtab_name *key, void *value); +void hashtab_remove (struct hashtab *table, struct hashtab_name *key); + +#endif /* _HASHTAB_H */ diff --git a/intel.c b/intel.c new file mode 100644 index 0000000..c7d153e --- /dev/null +++ b/intel.c @@ -0,0 +1,4515 @@ +/****************************************************************************** + * @file intel.c + *****************************************************************************/ +#include +#include +#include +#include + +#include "as.h" +#include "expr.h" +#include "fixup.h" +#include "frag.h" +#include "hashtab.h" +#include "intel.h" +#include "kwd.h" +#include "lex.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +struct templates { + + const char *name; + struct template *start, *end; + +}; + +#define RELAX_SUBTYPE_SHORT_JUMP 0x00 +#define RELAX_SUBTYPE_CODE16_JUMP 0x01 +#define RELAX_SUBTYPE_LONG_JUMP 0x02 + +#define RELAX_SUBTYPE_SHORT16_JUMP (RELAX_SUBTYPE_SHORT_JUMP | RELAX_SUBTYPE_CODE16_JUMP) +#define RELAX_SUBTYPE_LONG16_JUMP (RELAX_SUBTYPE_LONG_JUMP | RELAX_SUBTYPE_CODE16_JUMP) + +#define RELAX_SUBTYPE_UNCONDITIONAL_JUMP 0x00 +#define RELAX_SUBTYPE_CONDITIONAL_JUMP 0x01 +#define RELAX_SUBTYPE_CONDITIONAL_JUMP86 0x02 +#define RELAX_SUBTYPE_FORCED_SHORT_JUMP 0x03 + +#define ENCODE_RELAX_SUBTYPE(type, size) (((type) << 2) | (size)) +#define TYPE_FROM_RELAX_SUBTYPE(subtype) ((subtype) >> 2) + +#define DISPLACEMENT_SIZE_FROM_RELAX_SUBSTATE(s) \ + (((s) & 3) == RELAX_SUBTYPE_LONG_JUMP ? 4 : (((s) & 3) == RELAX_SUBTYPE_LONG16_JUMP ? 2 : 1)) + +struct relax_table_entry { + + long forward_reach; + long backward_reach; + long size_of_variable_part; + + unsigned int next_subtype; + +}; + +struct relax_table_entry relax_table[] = { + + /* Unconditional jumps. */ + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_LONG_JUMP) }, + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_LONG16_JUMP) }, + { 0, 0, 4, 0 }, + { 0, 0, 2, 0 }, + + /* Conditional jumps. */ + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_LONG_JUMP) }, + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_LONG16_JUMP) }, + { 0, 0, 5, 0 }, + { 0, 0, 3, 0 }, + + /* Conditional jumps 86. */ + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_LONG_JUMP) }, + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_LONG16_JUMP) }, + { 0, 0, 5, 0 }, + { 0, 0, 4, 0 }, + + /* Forced short jump that cannot be relaxed. */ + { 127 + 1, -128 + 1, 1, 0 }, + +}; + +#define TWOBYTE_OPCODE 0x0F + +static struct hashtab hashtab_templates = { 0 }; +static struct hashtab hashtab_regs = { 0 }; + +#define DEFAULT_CPU_ARCH_FLAGS (~0LU) +#define DEFAULT_CPU_ARCH_NAME "ALL" + +static unsigned long cpu_arch_flags = DEFAULT_CPU_ARCH_FLAGS; + +static char *cpu_arch_name = 0; +static char *cpu_extensions_name = 0; + +struct cpu_arch_entry { + + const char *name; + unsigned long cpu_flags; + +}; + +#define CPU_I8086_FLAGS (CPU_8086) +#define CPU_I186_FLAGS (CPU_I8086_FLAGS | CPU_186) +#define CPU_I286_FLAGS (CPU_I186_FLAGS | CPU_286) +#define CPU_I386_FLAGS (CPU_I286_FLAGS | CPU_386) + +/* i486 is the first CPU with a FPU integrated. */ +#define CPU_I486_FLAGS (CPU_I386_FLAGS | CPU_486 | CPU_387) +#define CPU_I686_FLAGS (CPU_I486_FLAGS | CPU_686 | CPU_687 | CPU_CMOV) + +static const struct cpu_arch_entry cpu_archs[] = { + + { "i8086", CPU_I8086_FLAGS }, + { "i186", CPU_I186_FLAGS }, + { "i286", CPU_I286_FLAGS }, + { "i386", CPU_I386_FLAGS }, + { "i486", CPU_I486_FLAGS }, + { "i686", CPU_I686_FLAGS } + +}; + +static const struct cpu_arch_entry cpu_extensions[] = { + + { "8087", CPU_8087 }, + { "287", CPU_287 }, + { "387", CPU_387 }, + { "687", CPU_387 | CPU_687 }, + { "cmov", CPU_CMOV } + +}; + +static const struct cpu_arch_entry cpu_no_extensions[] = { + + { "no87", CPU_8087 | CPU_287 | CPU_387 | CPU_687 }, + { "no8087", CPU_8087 }, + { "no287", CPU_287 }, + { "no387", CPU_387 }, + { "no687", CPU_687 }, + { "nocmov", CPU_CMOV } + +}; + +static struct reg_entry *reg_esp = 0; +static struct reg_entry *reg_ss = 0; +static struct reg_entry *reg_ds = 0; + +#define NO_SUF (NO_BSUF | NO_WSUF | NO_SSUF | NO_LSUF | NO_QSUF | NO_INTELSUF) +#define B_SUF (NO_WSUF | NO_SSUF | NO_LSUF | NO_QSUF | NO_INTELSUF) +#define W_SUF (NO_BSUF | NO_SSUF | NO_LSUF | NO_QSUF | NO_INTELSUF) +#define L_SUF (NO_BSUF | NO_WSUF | NO_SSUF | NO_QSUF | NO_INTELSUF) +#define Q_SUF (NO_BSUF | NO_WSUF | NO_SSUF | NO_LSUF | NO_INTELSUF) +#define INTEL_SUF (NO_BSUF | NO_WSUF | NO_SSUF | NO_LSUF | NO_QSUF) + +#define BW_SUF (NO_SSUF | NO_LSUF | NO_QSUF | NO_INTELSUF) +#define WL_SUF (NO_BSUF | NO_SSUF | NO_QSUF | NO_INTELSUF) +#define BWL_SUF (NO_SSUF | NO_QSUF | NO_INTELSUF) +#define SL_SUF (NO_BSUF | NO_WSUF | NO_QSUF | NO_INTELSUF) + +/*static int allow_no_prefix_reg = 0;*/ +static int intel_syntax = 1, bits = 16; + +/** Table for lexical analysis. */ +static char register_chars_table[256] = { 0 }; + +static struct template template_table[] = { + + /* Move instructions. */ + { "mov", 2, 0xA0, NONE, BWL_SUF | D | W, { DISP16 | DISP32, ACC, 0 }, CPU_8086 }, + { "mov", 2, 0x88, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "mov", 2, 0xB0, NONE, BWL_SUF | W | SHORT_FORM, { ENCODABLEIMM, REG8 | REG16 | REG32, 0 }, CPU_8086 }, + { "mov", 2, 0xC6, NONE, BWL_SUF | D | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + /* Move instructions for segment registers. */ + { "mov", 2, 0x8C, NONE, WL_SUF | MODRM, { SEGMENT1, WORD_REG | INV_MEM, 0 }, CPU_8086 }, + { "mov", 2, 0x8C, NONE, W_SUF | MODRM | IGNORE_SIZE, { SEGMENT1, ANY_MEM, 0 }, CPU_8086 }, + { "mov", 2, 0x8C, NONE, WL_SUF | MODRM, { SEGMENT2, WORD_REG | INV_MEM, 0 }, CPU_386 }, + { "mov", 2, 0x8C, NONE, W_SUF | MODRM | IGNORE_SIZE, { SEGMENT2, ANY_MEM, 0 }, CPU_386 }, + { "mov", 2, 0x8E, NONE, WL_SUF | MODRM | IGNORE_SIZE, { WORD_REG | INV_MEM, SEGMENT1, 0 }, CPU_8086 }, + { "mov", 2, 0x8E, NONE, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, SEGMENT1, 0 }, CPU_8086 }, + { "mov", 2, 0x8E, NONE, WL_SUF | MODRM | IGNORE_SIZE, { WORD_REG | INV_MEM, SEGMENT2, 0 }, CPU_386 }, + { "mov", 2, 0x8E, NONE, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, SEGMENT2, 0 }, CPU_386 }, + + /* Move instructions for control, debug and test registers. */ + { "mov", 2, 0x0F20, NONE, L_SUF | D | MODRM | IGNORE_SIZE, { CONTROL, REG32 | INV_MEM, 0 }, CPU_386 }, + { "mov", 2, 0x0F21, NONE, L_SUF | D | MODRM | IGNORE_SIZE, { DEBUG, REG32 | INV_MEM, 0 }, CPU_386 }, + { "mov", 2, 0x0F24, NONE, L_SUF | D | MODRM | IGNORE_SIZE, { TEST, REG32 | INV_MEM, 0 }, CPU_386 }, + + /* Move with sign extend. */ + /* "movsbl" and "movsbw" are not unified into "movsb" to prevent conflict with "movs". */ + { "movsbl", 2, 0x0FBE, NONE, NO_SUF | MODRM, { REG8 | ANY_MEM, REG32, 0 }, CPU_386 }, + { "movsbw", 2, 0x0FBE, NONE, NO_SUF | MODRM, { REG8 | ANY_MEM, REG16, 0 }, CPU_386 }, + { "movswl", 2, 0x0FBF, NONE, NO_SUF | MODRM, { REG16 | ANY_MEM, REG32, 0 }, CPU_386 }, + + /* Alternative syntax. */ + { "movsx", 2, 0x0FBE, NONE, BW_SUF | W | MODRM, { REG8 | REG16 | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + + /* Move with zero extend. */ + { "movzb", 2, 0x0FB6, NONE, WL_SUF | MODRM, { REG8 | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "movzwl", 2, 0x0FB7, NONE, NO_SUF | MODRM, { REG16 | ANY_MEM, REG32, 0 }, CPU_386 }, + + /* Alternative syntax. */ + { "movzx", 2, 0x0FB6, NONE, BW_SUF | W | MODRM, { REG8 | REG16 | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + + /* Push instructions. */ + { "push", 1, 0x50, NONE, WL_SUF | SHORT_FORM, { WORD_REG, 0, 0 }, CPU_8086 }, + { "push", 1, 0xFF, 6, WL_SUF | DEFAULT_SIZE | MODRM, { WORD_REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "push", 1, 0x6A, NONE, WL_SUF | DEFAULT_SIZE, { IMM8S, 0, 0 }, CPU_186 }, + { "push", 1, 0x68, NONE, WL_SUF | DEFAULT_SIZE, { IMM16 | IMM32, 0, 0 }, CPU_186 }, + + { "push", 1, 0x06, NONE, WL_SUF | DEFAULT_SIZE | SEGSHORTFORM, { SEGMENT1, 0, 0 }, 0}, + { "push", 1, 0x0FA0, NONE, WL_SUF | DEFAULT_SIZE | SEGSHORTFORM, { SEGMENT2, 0, 0 }, 3}, + + { "pusha", 0, 0x60, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_186 }, + + /* Pop instructions. */ + { "pop", 1, 0x58, NONE, WL_SUF | SHORT_FORM, { WORD_REG, 0, 0 }, CPU_8086 }, + { "pop", 1, 0x8F, NONE, WL_SUF | DEFAULT_SIZE | MODRM, { WORD_REG | ANY_MEM, 0, 0 }, CPU_8086 }, + +#define POP_SEGMENT_SHORT 0x07 + + { "pop", 1, 0x07, NONE, WL_SUF | DEFAULT_SIZE | SEGSHORTFORM, { SEGMENT1, 0, 0 }, CPU_8086 }, + { "pop", 1, 0x0FA1, NONE, WL_SUF | DEFAULT_SIZE | SEGSHORTFORM, { SEGMENT2, 0, 0 }, CPU_386 }, + + { "popa", 0, 0x61, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_186 }, + + /* Exchange instructions. */ + { "xchg", 2, 0x90, NONE, WL_SUF | SHORT_FORM, { WORD_REG, ACC, 0 }, CPU_8086 }, + { "xchg", 2, 0x90, NONE, WL_SUF | SHORT_FORM, { ACC, WORD_REG, 0 }, CPU_8086 }, + { "xchg", 2, 0x86, NONE, BWL_SUF | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "xchg", 2, 0x86, NONE, BWL_SUF | W | MODRM, { REG | ANY_MEM, REG, 0 }, CPU_8086 }, + + /* In/out for ports. */ + { "in", 2, 0xE4, NONE, BWL_SUF | W, { IMM8, ACC, 0 }, CPU_8086 }, + { "in", 2, 0xEC, NONE, BWL_SUF | W, { PORT, ACC, 0 }, CPU_8086 }, + { "in", 1, 0xE4, NONE, BWL_SUF | W, { IMM8, 0, 0 }, CPU_8086 }, + { "in", 1, 0xEC, NONE, BWL_SUF | W, { PORT, 0, 0 }, CPU_8086 }, + + { "out", 2, 0xE6, NONE, BWL_SUF | W, { ACC, IMM8, 0 }, CPU_8086 }, + { "out", 2, 0xEE, NONE, BWL_SUF | W, { ACC, PORT, 0 }, CPU_8086 }, + { "out", 2, 0xE6, NONE, BWL_SUF | W, { IMM8, 0, 0 }, CPU_8086 }, + { "out", 2, 0xEE, NONE, BWL_SUF | W, { PORT, 0, 0 }, CPU_8086 }, + + /* Load effective address. */ + { "lea", 2, 0x8D, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_8086 }, + + /* Load far pointer from memory. */ + { "lds", 2, 0xC5, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_8086 }, + { "les", 2, 0xC4, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_8086 }, + { "lfs", 2, 0x0FB4, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "lgs", 2, 0x0FB5, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "lss", 2, 0x0FB2, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_386 }, + + /* Flags register instructions. */ + { "cmc", 0, 0xF5, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "clc", 0, 0xF8, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "stc", 0, 0xF9, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "cli", 0, 0xFA, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "sti", 0, 0xFB, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "cld", 0, 0xFC, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "std", 0, 0xFD, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "clts", 0, 0x0F06, NONE, NO_SUF, { 0, 0, 0 }, CPU_286 }, + { "lahf", 0, 0x9F, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "sahf", 0, 0x9E, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "pushf", 0, 0x9C, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + { "popf", 0, 0x9D, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + + /* Arithmetic instructions. */ + { "add", 2, 0x00, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "add", 2, 0x83, 0, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "add", 2, 0x04, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "add", 2, 0x80, 0, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "inc", 1, 0x40, NONE, WL_SUF | SHORT_FORM, { WORD_REG, 0, 0 }, CPU_8086 }, + { "inc", 1, 0xFE, 0, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "sub", 2, 0x28, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "sub", 2, 0x83, 5, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "sub", 2, 0x2C, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "sub", 2, 0x80, 5, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "dec", 1, 0x48, NONE, WL_SUF | SHORT_FORM, { WORD_REG, 0, 0 }, CPU_8086 }, + { "dec", 1, 0xFE, 1, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "sbb", 2, 0x18, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "sbb", 2, 0x83, 3, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "sbb", 2, 0x1C, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "sbb", 2, 0x80, 3, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "cmp", 2, 0x38, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "cmp", 2, 0x83, 7, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "cmp", 2, 0x3C, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "cmp", 2, 0x80, 7, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "test", 2, 0x84, NONE, BWL_SUF | W | MODRM, { REG | ANY_MEM, REG, 0 }, CPU_8086 }, + { "test", 2, 0x84, NONE, BWL_SUF | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "test", 2, 0xA8, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "test", 2, 0xF6, 0, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "and", 2, 0x20, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "and", 2, 0x83, 4, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "and", 2, 0x24, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "and", 2, 0x80, 4, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "or", 2, 0x08, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "or", 2, 0x83, 1, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "or", 2, 0x0C, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "or", 2, 0x80, 1, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "xor", 2, 0x30, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "xor", 2, 0x83, 6, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "xor", 2, 0x34, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "xor", 2, 0x80, 6, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "clr", 1, 0x30, NONE, BWL_SUF | W | MODRM | REG_DUPLICATION, { REG, 0, 0 }, CPU_8086 }, + + { "adc", 2, 0x10, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "adc", 2, 0x83, 2, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "adc", 2, 0x14, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "adc", 2, 0x80, 2, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "neg", 1, 0xF6, 3, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "not", 1, 0xF6, 2, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "aaa", 0, 0x37, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "aas", 0, 0x3F, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + + { "daa", 0, 0x27, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "das", 0, 0x2F, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + + { "aad", 0, 0xD50A, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "aad", 1, 0xD5, NONE, NO_SUF, { IMM8, 0, 0 }, CPU_8086 }, + + { "aam", 0, 0xD40A, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "aam", 1, 0xD4, NONE, NO_SUF, { IMM8, 0, 0 }, CPU_8086 }, + + /* Conversion instructions. */ + { "cbw", 0, 0x98, NONE, NO_SUF | SIZE16, { 0, 0, 0 }, CPU_8086 }, + { "cwde", 0, 0x98, NONE, NO_SUF | SIZE32, { 0, 0, 0 }, CPU_8086 }, + { "cwd", 0, 0x99, NONE, NO_SUF | SIZE16, { 0, 0, 0 }, CPU_8086 }, + { "cdq", 0, 0x99, NONE, NO_SUF | SIZE32, { 0, 0, 0 }, CPU_386 }, + + /* Other naming. */ + { "cbtw", 0, 0x98, NONE, NO_SUF | SIZE16, { 0, 0, 0 }, CPU_8086 }, + { "cwtl", 0, 0x98, NONE, NO_SUF | SIZE32, { 0, 0, 0 }, CPU_8086 }, + { "cwtd", 0, 0x99, NONE, NO_SUF | SIZE16, { 0, 0, 0 }, CPU_8086 }, + { "cltd", 0, 0x99, NONE, NO_SUF | SIZE32, { 0, 0, 0 }, CPU_386 }, + + { "mul", 1, 0xF6, 4, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "imul", 1, 0xF6, 5, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "imul", 2, 0x0FAF, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "imul", 3, 0x6B, NONE, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, WORD_REG }, CPU_186 }, + { "imul", 3, 0x69, NONE, WL_SUF | MODRM, { IMM16 | IMM32, WORD_REG | ANY_MEM, WORD_REG }, CPU_186 }, + { "imul", 2, 0x6B, NONE, WL_SUF | MODRM | REG_DUPLICATION, { IMM8S, WORD_REG, 0 }, CPU_186 }, + { "imul", 2, 0x69, NONE, WL_SUF | MODRM | REG_DUPLICATION, { IMM16 | IMM32, WORD_REG, 0 }, CPU_186 }, + + { "div", 1, 0xF6, 6, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "div", 2, 0xF6, 6, BWL_SUF | W | MODRM, { REG | ANY_MEM, ACC, 0 }, CPU_8086 }, + + { "idiv", 1, 0xF6, 7, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "idiv", 2, 0xF6, 7, BWL_SUF | W | MODRM, { REG | ANY_MEM, ACC, 0 }, CPU_8086 }, + + { "rol", 2, 0xC0, 0, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "rol", 2, 0xD2, 0, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "rol", 1, 0xD0, 0, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "ror", 2, 0xC0, 1, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "ror", 2, 0xD2, 1, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "ror", 1, 0xD0, 1, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "rcl", 2, 0xC0, 2, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "rcl", 2, 0xD2, 2, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "rcl", 1, 0xD0, 2, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "rcr", 2, 0xC0, 3, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "rcr", 2, 0xD2, 3, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "rcr", 1, 0xD0, 3, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "sal", 2, 0xC0, 4, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "sal", 2, 0xD2, 4, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "sal", 1, 0xD0, 4, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "shl", 2, 0xC0, 4, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "shl", 2, 0xD2, 4, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "shl", 1, 0xD0, 4, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "shr", 2, 0xC0, 5, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "shr", 2, 0xD2, 5, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "shr", 1, 0xD0, 5, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "sar", 2, 0xC0, 7, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "sar", 2, 0xD2, 7, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "sar", 1, 0xD0, 7, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "shld", 3, 0x0FA4, NONE, WL_SUF | MODRM, { IMM8, WORD_REG, WORD_REG | ANY_MEM }, CPU_386 }, + { "shld", 3, 0x0FA5, NONE, WL_SUF | MODRM, { SHIFT_COUNT, WORD_REG, WORD_REG | ANY_MEM }, CPU_386 }, + { "shld", 2, 0x0FA5, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + + { "shrd", 3, 0x0FAC, NONE, WL_SUF | MODRM, { IMM8, WORD_REG, WORD_REG | ANY_MEM }, CPU_386 }, + { "shrd", 3, 0x0FAD, NONE, WL_SUF | MODRM, { SHIFT_COUNT, WORD_REG, WORD_REG | ANY_MEM }, CPU_386 }, + { "shrd", 2, 0x0FAD, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + + /* Program control transfer instructions. */ + { "call", 1, 0xE8, NONE, WL_SUF | DEFAULT_SIZE | CALL, { DISP16 | DISP32, 0, 0 }, CPU_8086 }, + { "call", 1, 0xFF, 2, WL_SUF | DEFAULT_SIZE | MODRM, { WORD_REG | ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + { "call", 2, 0x9A, NONE, WL_SUF | DEFAULT_SIZE | JUMPINTERSEGMENT, { IMM16, IMM16 | IMM32, 0 }, CPU_8086 }, + { "call", 1, 0xFF, 3, INTEL_SUF | DEFAULT_SIZE | MODRM, { ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + + /* Alternative syntax. */ + { "lcall", 2, 0x9A, NONE, WL_SUF | DEFAULT_SIZE | JUMPINTERSEGMENT, { IMM16, IMM16 | IMM32, 0 }, CPU_8086 }, + { "lcall", 1, 0xFF, 3, WL_SUF | DEFAULT_SIZE | MODRM, { ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + +#define PC_RELATIVE_JUMP 0xEB + + { "jmp", 1, 0xEB, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jmp", 1, 0xFF, 4, WL_SUF | MODRM, { WORD_REG | ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + { "jmp", 2, 0xEA, NONE, WL_SUF | JUMPINTERSEGMENT, { IMM16, IMM16 | IMM32, 0 }, CPU_8086 }, + { "jmp", 1, 0xFF, 5, INTEL_SUF | MODRM, { ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + + /* Alternative syntax. */ + { "ljmp", 2, 0xEA, NONE, WL_SUF | JUMPINTERSEGMENT, { IMM16, IMM16 | IMM32, 0 }, CPU_8086 }, + { "ljmp", 1, 0xFF, 5, WL_SUF | MODRM, { ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + + { "ret", 0, 0xC3, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + { "ret", 1, 0xC2, NONE, WL_SUF | DEFAULT_SIZE, { IMM16, 0, 0 }, CPU_8086 }, + { "retf", 0, 0xCB, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + { "retf", 1, 0xCA, NONE, WL_SUF | DEFAULT_SIZE, { IMM16, 0, 0 }, CPU_8086 }, + { "lret", 0, 0xCB, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + { "lret", 1, 0xCA, NONE, WL_SUF | DEFAULT_SIZE, { IMM16, 0, 0 }, CPU_8086 }, + { "enter", 2, 0xC8, NONE, WL_SUF | DEFAULT_SIZE, { IMM16, IMM8, 0 }, CPU_186 }, + { "leave", 0, 0xC9, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_186 }, + + /* Conditional jumps. */ + { "jo", 1, 0x70, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jno", 1, 0x71, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jb", 1, 0x72, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jc", 1, 0x72, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnae", 1, 0x72, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnb", 1, 0x73, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnc", 1, 0x73, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jae", 1, 0x73, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "je", 1, 0x74, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jz", 1, 0x74, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jne", 1, 0x75, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnz", 1, 0x75, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jbe", 1, 0x76, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jna", 1, 0x76, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "ja", 1, 0x77, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnbe", 1, 0x77, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "js", 1, 0x78, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jns", 1, 0x79, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jp", 1, 0x7A, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jpe", 1, 0x7A, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnp", 1, 0x7B, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jpo", 1, 0x7B, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jl", 1, 0x7C, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnge", 1, 0x7C, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jge", 1, 0x7D, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnl", 1, 0x7D, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jle", 1, 0x7E, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jng", 1, 0x7E, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jg", 1, 0x7F, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnle", 1, 0x7F, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + + { "jcxz", 1, 0xE3, NONE, NO_SUF | JUMPBYTE | SIZE16, { DISP, 0, 0 }, CPU_8086 }, + { "jecxz", 1, 0xE3, NONE, NO_SUF | JUMPBYTE | SIZE32, { DISP, 0, 0 }, CPU_8086 }, + + /* Loop instructions. */ + { "loop", 1, 0xE2, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + { "loopz", 1, 0xE1, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + { "loope", 1, 0xE1, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + { "loopnz", 1, 0xE0, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + { "loopne", 1, 0xE0, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + + /* Set byte on flag instructions. */ + { "seto", 1, 0x0F90, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setno", 1, 0x0F91, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setb", 1, 0x0F92, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setc", 1, 0x0F92, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnae", 1, 0x0F92, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnb", 1, 0x0F93, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnc", 1, 0x0F93, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setae", 1, 0x0F93, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "sete", 1, 0x0F94, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setz", 1, 0x0F94, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setne", 1, 0x0F95, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnz", 1, 0x0F95, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setbe", 1, 0x0F96, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setna", 1, 0x0F96, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnbe", 1, 0x0F97, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "seta", 1, 0x0F97, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "sets", 1, 0x0F98, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setns", 1, 0x0F99, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setp", 1, 0x0F9A, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setpe", 1, 0x0F9A, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnp", 1, 0x0F9B, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setpo", 1, 0x0F9B, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setl", 1, 0x0F9C, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnge", 1, 0x0F9C, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnl", 1, 0x0F9D, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setge", 1, 0x0F9D, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setle", 1, 0x0F9E, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setng", 1, 0x0F9E, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnle", 1, 0x0F9F, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setg", 1, 0x0F9F, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + + /* String manipulation instructions. */ + { "cmps", 0, 0xA6, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "scmp", 0, 0xA6, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "ins", 0, 0x6C, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_186 }, + { "outs", 0, 0x6E, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_186 }, + { "lods", 0, 0xAC, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "slod", 0, 0xAC, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "movs", 0, 0xA4, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "smov", 0, 0xA4, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "scas", 0, 0xAE, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "ssca", 0, 0xAE, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "stos", 0, 0xAA, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "ssto", 0, 0xAA, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "xlat", 0, 0xD7, NONE, B_SUF | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + + /* Bit manipulation instructions. */ + { "bsf", 2, 0x0FBC, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "bsr", 2, 0x0FBD, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "bt", 2, 0x0FA3, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "bt", 2, 0x0FBA, 4, WL_SUF | MODRM, { IMM8, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "btc", 2, 0x0FBB, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "btc", 2, 0x0FBA, 7, WL_SUF | MODRM, { IMM8, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "btr", 2, 0x0FB3, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "btr", 2, 0x0FBA, 6, WL_SUF | MODRM, { IMM8, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "bts", 2, 0x0FAB, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "bts", 2, 0x0FBA, 5, WL_SUF | MODRM, { IMM8, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + + /* Interrupts. */ +#define INT_OPCODE 0xCD +#define INT3_OPCODE 0xCC + + { "int", 1, 0xCD, NONE, 0, { IMM8, 0, 0 }, CPU_8086 }, + { "int3", 0, 0xCC, NONE, 0, { 0, 0, 0 }, CPU_8086 }, + { "into", 0, 0xCE, NONE, 0, { 0, 0, 0 }, CPU_8086 }, + { "iret", 0, 0xCF, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + + { "rsm", 0, 0x0FAA, NONE, 0, { 0, 0, 0 }, CPU_386 }, + { "bound", 2, 0x62, NONE, 0, { WORD_REG, ANY_MEM, 0 }, CPU_186 }, + + { "hlt", 0, 0xF4, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "nop", 0, 0x90, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + + /* Protection control. */ + { "arpl", 2, 0x63, NONE, W_SUF | MODRM | IGNORE_SIZE, { REG16, REG16 | ANY_MEM, 0 }, CPU_286 }, + { "lar", 2, 0x0F02, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_286 }, + { "lgdt", 1, 0x0F01, 2, WL_SUF | MODRM, { ANY_MEM, 0, 0 }, CPU_286 }, + { "lidt", 1, 0x0F01, 3, WL_SUF | MODRM, { ANY_MEM, 0, 0 }, CPU_286 }, + { "lldt", 1, 0x0F00, 2, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + { "lmsw", 1, 0x0F01, 6, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + { "lsl", 2, 0x0F03, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_286 }, + { "ltr", 1, 0x0F00, 3, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + + { "sgdt", 1, 0x0F01, 0, WL_SUF | MODRM, { ANY_MEM, 0, 0 }, CPU_286 }, + { "sidt", 1, 0x0F01, 1, WL_SUF | MODRM, { ANY_MEM, 0, 0 }, CPU_286 }, + { "sldt", 1, 0x0F00, 0, WL_SUF | MODRM, { WORD_REG | INV_MEM, 0, 0 }, CPU_286 }, + { "sldt", 1, 0x0F00, 0, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, 0, 0 }, CPU_286 }, + { "smsw", 1, 0x0F01, 4, WL_SUF | MODRM, { WORD_REG | INV_MEM, 0, 0 }, CPU_286 }, + { "smsw", 1, 0x0F01, 4, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, 0, 0 }, CPU_286 }, + { "str", 1, 0x0F00, 1, WL_SUF | MODRM, { WORD_REG | INV_MEM, 0, 0 }, CPU_286 }, + { "str", 1, 0x0F00, 1, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, 0, 0 }, CPU_286 }, + + { "verr", 1, 0x0F00, 4, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + { "verw", 1, 0x0F00, 5, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + + /* Opcode prefixes. They are allowed as separate instructions too. */ +#define ADDR_PREFIX_OPCODE 0x67 + + { "addr16", 0, 0x67, NONE, NO_SUF | IS_PREFIX | SIZE16 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "addr32", 0, 0x67, NONE, NO_SUF | IS_PREFIX | SIZE32 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "aword", 0, 0x67, NONE, NO_SUF | IS_PREFIX | SIZE16 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "adword", 0, 0x67, NONE, NO_SUF | IS_PREFIX | SIZE32 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + +#define DATA_PREFIX_OPCODE 0x66 + + { "data16", 0, 0x66, NONE, NO_SUF | IS_PREFIX | SIZE16 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "data32", 0, 0x66, NONE, NO_SUF | IS_PREFIX | SIZE32 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "word", 0, 0x66, NONE, NO_SUF | IS_PREFIX | SIZE16 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "dword", 0, 0x66, NONE, NO_SUF | IS_PREFIX | SIZE32 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + +#define CS_PREFIX_OPCODE 0x2E + { "cs", 0, 0x2E, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define DS_PREFIX_OPCODE 0x3E + { "ds", 0, 0x3E, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define ES_PREFIX_OPCODE 0x26 + { "es", 0, 0x26, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define FS_PREFIX_OPCODE 0x64 + { "fs", 0, 0x64, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define GS_PREFIX_OPCODE 0x65 + { "gs", 0, 0x65, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define SS_PREFIX_OPCODE 0x36 + { "ss", 0, 0x36, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + +#define REPNE_PREFIX_OPCODE 0xF2 +#define REPE_PREFIX_OPCODE 0xF3 + + { "repne", 0, 0xF2, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + { "repnz", 0, 0xF2, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + { "rep", 0, 0xF3, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + { "repe", 0, 0xF3, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + { "repz", 0, 0xF3, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + + /* i486 extensions. */ + { "bswap", 1, 0x0FC8, NONE, L_SUF | SHORT_FORM, { REG32, 0, 0 }, CPU_486 }, + + /* End of instructions. */ + { 0, 0, 0, 0, 0, { 0, 0, 0 }, 0 } + +}; + +/* Prefixes are emitted in the following order. */ +#define SEGMENT_PREFIX 0x00 +#define ADDR_PREFIX 0x01 +#define DATA_PREFIX 0x02 +#define REP_PREFIX 0x03 + +#define MAX_PREFIXES 0x04 + +static struct reg_entry reg_table[] = { + + /* 8 bit registers. */ + { "al", REG8 | ACC, 0 }, + { "cl", REG8 | SHIFT_COUNT, 1 }, + { "dl", REG8, 2 }, + { "bl", REG8, 3 }, + { "ah", REG8, 4 }, + { "ch", REG8, 5 }, + { "dh", REG8, 6 }, + { "bh", REG8, 7 }, + + /* 16 bit registers. */ + { "ax", REG16 | ACC, 0 }, + { "cx", REG16, 1 }, + { "dx", REG16 | PORT, 2 }, + { "bx", REG16 | BASE_INDEX, 3 }, + { "sp", REG16, 4 }, + { "bp", REG16 | BASE_INDEX, 5 }, + { "si", REG16 | BASE_INDEX, 6 }, + { "di", REG16 | BASE_INDEX, 7 }, + + /* 32 bit registers. */ + { "eax", REG32 | BASE_INDEX | ACC, 0 }, + { "ecx", REG32 | BASE_INDEX, 1 }, + { "edx", REG32 | BASE_INDEX, 2 }, + { "ebx", REG32 | BASE_INDEX, 3 }, + { "esp", REG32, 4 }, + { "ebp", REG32 | BASE_INDEX, 5 }, + { "esi", REG32 | BASE_INDEX, 6 }, + { "edi", REG32 | BASE_INDEX, 7 }, + + /* Segment registers. */ + { "es", SEGMENT1, 0 }, + { "cs", SEGMENT1, 1 }, + { "ss", SEGMENT1, 2 }, + { "ds", SEGMENT1, 3 }, + { "fs", SEGMENT2, 4 }, + { "gs", SEGMENT2, 5 }, + + /* Segment pseudo-register. */ + { "flat", SEGMENT1, REG_FLAT_NUMBER }, + + /* Control registers. */ + { "cr0", CONTROL, 0 }, + { "cr1", CONTROL, 1 }, + { "cr2", CONTROL, 2 }, + { "cr3", CONTROL, 3 }, + { "cr4", CONTROL, 4 }, + { "cr5", CONTROL, 5 }, + { "cr6", CONTROL, 6 }, + { "cr7", CONTROL, 7 }, + + /* Debug registers. */ + { "db0", DEBUG, 0 }, + { "db1", DEBUG, 1 }, + { "db2", DEBUG, 2 }, + { "db3", DEBUG, 3 }, + { "db4", DEBUG, 4 }, + { "db5", DEBUG, 5 }, + { "db6", DEBUG, 6 }, + { "db7", DEBUG, 7 }, + + /* Other naming. */ + { "dr0", DEBUG, 0 }, + { "dr1", DEBUG, 1 }, + { "dr2", DEBUG, 2 }, + { "dr3", DEBUG, 3 }, + { "dr4", DEBUG, 4 }, + { "dr5", DEBUG, 5 }, + { "dr6", DEBUG, 6 }, + { "dr7", DEBUG, 7 }, + + /* Test registers. */ + { "tr0", TEST, 0 }, + { "tr1", TEST, 1 }, + { "tr2", TEST, 2 }, + { "tr3", TEST, 3 }, + { "tr4", TEST, 4 }, + { "tr5", TEST, 5 }, + { "tr6", TEST, 6 }, + { "tr7", TEST, 7 }, + + /* End of registers. */ + { 0, 0, 0 } + +}; + +#define BYTE_SUFFIX 'b' +#define WORD_SUFFIX 'w' +#define SHORT_SUFFIX 's' +#define DWORD_SUFFIX 'l' +#define QWORD_SUFFIX 'q' + +/* Internal suffix for .intel_syntax. It cannot be directly used by the user. */ +#define INTEL_SUFFIX '\1' + +struct templates *machine_dependent_find_templates (char *name, int check_suffix) { + + char *lname; + + struct hashtab_name *key; + struct templates *entry; + + lname = to_lower (name); + + if ((key = hashtab_get_key (&hashtab_templates, lname))) { + + if ((entry = hashtab_get (&hashtab_templates, key))) { + + free (lname); + return entry; + + } + + } + + if (check_suffix) { + + char *p2 = lname + strlen (lname); + + switch (p2[-1]) { + + case WORD_SUFFIX: + case BYTE_SUFFIX: + case QWORD_SUFFIX: + + p2[-1] = '\0'; + break; + + case SHORT_SUFFIX: + case DWORD_SUFFIX: + + if (!intel_syntax) { + p2[-1] = '\0'; + } + + break; + + /* Intel syntax only. */ + case 'd': + + if (intel_syntax) { + p2[-1] = '\0'; + } + + break; + + } + + if ((key = hashtab_get_key (&hashtab_templates, lname))) { + + if ((entry = hashtab_get (&hashtab_templates, key))) { + + free (lname); + return entry; + + } + + } + + } + + free (lname); + return 0; + +} + +struct reg_entry *machine_dependent_find_reg_entry (char *name) { + + char *lname; + + struct hashtab_name *key; + struct reg_entry *entry; + + lname = to_lower (name); + + if ((key = hashtab_get_key (&hashtab_regs, lname))) { + + if ((entry = hashtab_get (&hashtab_regs, key))) { + + free (lname); + return entry; + + } + + } + + free (lname); + return 0; + +} + +#define EXPR_TYPE_SHORT EXPR_TYPE_MACHINE_DEPENDENT_0 +#define EXPR_TYPE_OFFSET EXPR_TYPE_MACHINE_DEPENDENT_1 + +#define EXPR_TYPE_FULL_PTR EXPR_TYPE_MACHINE_DEPENDENT_2 +#define EXPR_TYPE_NEAR_PTR EXPR_TYPE_MACHINE_DEPENDENT_3 +#define EXPR_TYPE_FAR_PTR EXPR_TYPE_MACHINE_DEPENDENT_4 + +#define EXPR_TYPE_BYTE_PTR EXPR_TYPE_MACHINE_DEPENDENT_5 +#define EXPR_TYPE_WORD_PTR EXPR_TYPE_MACHINE_DEPENDENT_6 +#define EXPR_TYPE_DWORD_PTR EXPR_TYPE_MACHINE_DEPENDENT_7 +#define EXPR_TYPE_FWORD_PTR EXPR_TYPE_MACHINE_DEPENDENT_8 +#define EXPR_TYPE_QWORD_PTR EXPR_TYPE_MACHINE_DEPENDENT_9 + +struct modrm_byte { + + unsigned int regmem; + unsigned int reg; + unsigned int mode; + +}; + +struct sib_byte { + + unsigned int base; + unsigned int index; + unsigned int scale; + +}; + +#define MODRM_REGMEM_TWO_BYTE_ADDRESSING 0x04 +#define SIB_BASE_NO_BASE_REGISTER 0x05 +#define SIB_BASE_NO_BASE_REGISTER_16 0x06 +#define SIB_INDEX_NO_INDEX_REGISTER 0x04 + +static struct { + + enum expr_type operand_modifier; + + int is_mem; + int is_indirect; + int has_offset; + + int in_offset; + int in_bracket; + int in_scale; + + struct reg_entry *base_reg; + struct reg_entry *index_reg; + + signed long scale_factor; + struct symbol *segment; + +} intel_state; + +struct instruction { + + struct template template; + char suffix; + + unsigned int log2_scale_factor; + int force_short_jump; + + int operands; + int reg_operands; + int disp_operands; + int mem_operands; + + unsigned int prefixes[MAX_PREFIXES]; + int prefix_count; + + struct modrm_byte modrm; + struct sib_byte sib; + + struct reg_entry *base_reg, *index_reg; + unsigned int types[MAX_OPERANDS]; + + struct expr *imms[MAX_OPERANDS]; + struct expr *disps[MAX_OPERANDS]; + + struct reg_entry *regs[MAX_OPERANDS]; + struct reg_entry *segments[MAX_OPERANDS]; + +}; + +static struct expr operand_exprs[MAX_OPERANDS]; +static struct instruction instruction; + +static struct templates *current_templates = 0; +static int operand_exprs_count; + +static void machine_dependent_set_march (const char *optarg) { + + char *arg, *orig_arg; + char *next; + + arg = orig_arg = xstrdup (optarg); + + if (cpu_extensions_name == 0) { + cpu_extensions_name = xstrdup (""); + } + + if (*arg == '+') { + ++arg; + } + + while (1) { + + unsigned long i; + char saved_c; + + next = strchr (arg, '+'); + + if (next == 0) { + next = arg + strlen (arg); + } + + saved_c = *next; + *next = '\0'; + + for (i = 0; i < ARRAY_SIZE (cpu_archs); i++) { + + if (strcmp (arg, cpu_archs[i].name) == 0) { + + cpu_arch_flags = cpu_archs[i].cpu_flags; + + free (cpu_arch_name); + cpu_arch_name = xstrdup (arg); + + free (cpu_extensions_name); + cpu_extensions_name = xstrdup (""); + + break; + + } + + } + + if (i == ARRAY_SIZE (cpu_archs)) { + + for (i = 0; i < ARRAY_SIZE (cpu_extensions); i++) { + + if (strcmp (arg, cpu_extensions[i].name) == 0) { + + cpu_arch_flags |= cpu_extensions[i].cpu_flags; + cpu_extensions_name = xrealloc (cpu_extensions_name, strlen (cpu_extensions_name) + 1 + 1 + strlen (cpu_extensions[i].name)); + + strcat (cpu_extensions_name, "."); + strcat (cpu_extensions_name, cpu_extensions[i].name); + + break; + + } + + } + + if (i == ARRAY_SIZE (cpu_extensions)) { + + for (i = 0; i < ARRAY_SIZE (cpu_no_extensions); i++) { + + if (strcmp (arg, cpu_no_extensions[i].name) == 0) { + + cpu_arch_flags &= ~cpu_no_extensions[i].cpu_flags; + cpu_extensions_name = xrealloc (cpu_extensions_name, strlen (cpu_extensions_name) + 1 + 1 + strlen (cpu_no_extensions[i].name)); + + strcat (cpu_extensions_name, "."); + strcat (cpu_extensions_name, cpu_no_extensions[i].name); + + break; + + } + + } + + if (i == ARRAY_SIZE (cpu_no_extensions)) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid march option: '%s'", optarg); + } + + } + + } + + if (saved_c == '\0') { + break; + } + + *next = saved_c; + arg = next + 1; + + } + + free (orig_arg); + +} + + +static void handler_8086 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i8086"); + +} + +static void handler_8087 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i8086+8087"); + +} + +static void handler_186 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i186"); + +} + +static void handler_286 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i286"); + +} + +static void handler_287 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i286+287"); + +} + +static void handler_386 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i386"); + +} + +static void handler_387 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i386+387"); + +} + +static void handler_486 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i486"); + +} + +static void handler_586 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i586"); + +} + +static void handler_686 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i686"); + +} + +static void handler_extern (char *start, char **pp) { + + struct symbol *symbol; + char *name, *caret, *qualifier; + + for (;;) { + + caret = (*pp = skip_whitespace (*pp)); + + if (!(name = symname (pp))) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "expected symbol name"); + + ignore_rest_of_line (pp); + return; + + } + + if ((symbol = symbol_find (name))) { + + if (symbol->scope == SYMBOL_SCOPE_LOCAL || symbol->scope == SYMBOL_SCOPE_GLOBAL) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "symbol '%s' is already defined", name); + } else { + + symbol->scope = SYMBOL_SCOPE_EXTERN; + symbol_set_external (symbol); + + } + + } else { + + symbol = symbol_make (name); + symbol_add_to_chain (symbol); + + symbol->scope = SYMBOL_SCOPE_EXTERN; + symbol_set_external (symbol); + + } + + *pp = skip_whitespace (*pp); + free (name); + + if (**pp == ':') { + + *pp = skip_whitespace (*pp + 1); + + if (!(qualifier = symname (pp))) { + error: + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid qualifier type"); + + ignore_rest_of_line (pp); + return; + + } + + *pp = skip_whitespace (*pp); + + if (xstrcasecmp (qualifier, "byte") && xstrcasecmp (qualifier, "word") && xstrcasecmp (qualifier, "dword")) { + + free (qualifier); + goto error; + + } + + free (qualifier); + + } + + if (**pp != ',') { + break; + } + + (*pp)++; + + } + +} + +static void handler_model (char *start, char **pp) { + + char *caret = (*pp = skip_whitespace (*pp)); + char *model, *lang; + + if (!(model = symname (pp))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "memory model is not found"); + + ignore_rest_of_line (pp); + return; + + } + + *pp = skip_whitespace (*pp); + + if (xstrcasecmp (model, "tiny") == 0) { + + state->data_size = 0; + state->model = 1; + + } else if (xstrcasecmp (model, "small") == 0) { + + state->data_size = 0; + state->model = 2; + + } else if (xstrcasecmp (model, "compact") == 0) { + + state->data_size = 1; + state->model = 3; + + } else if (xstrcasecmp (model, "medium") == 0) { + + state->data_size = 0; + state->model = 4; + + } else if (xstrcasecmp (model, "large") == 0) { + + state->data_size = 1; + state->model = 5; + + } else if (xstrcasecmp (model, "huge") == 0) { + + state->data_size = 2; + state->model = 6; + + } else if (xstrcasecmp (model, "flat") == 0) { + + state->data_size = 0; + state->model = 7; + + if ((cpu_arch_flags & CPU_386)) { + bits = 32; + } + + } else { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "invalid memory model '%s' provided", model); + free (model); + + ignore_rest_of_line (pp); + return; + + } + + free (model); + + if (**pp != ',') { + return; + } + + caret = (*pp = skip_whitespace (*pp + 1)); + + if (!(lang = symname (pp))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no language specified"); + + ignore_rest_of_line (pp); + return; + + } + + *pp = skip_whitespace (*pp); + + if (xstrcasecmp (lang, "c") == 0) { + + state->ext = "_"; + + free (lang); + return; + + } + + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "invalid language '%s' provided", lang); + free (lang); + + ignore_rest_of_line (pp); + +} + +static struct pseudo_op_entry pseudo_op_table[] = { + + { ".8086", &handler_8086 }, + { ".8087", &handler_8087 }, + + { ".186", &handler_186 }, + + { ".286", &handler_286 }, + { ".286p", &handler_286 }, + { ".287", &handler_287 }, + + { ".386", &handler_386 }, + { ".386p", &handler_386 }, + { ".387", &handler_387 }, + + { ".486", &handler_486 }, + { ".486p", &handler_486 }, + + { ".586", &handler_586 }, + { ".686", &handler_686 }, + + { "extern", &handler_extern }, + { "extrn", &handler_extern }, + + { ".model", &handler_model }, + { 0, 0 } + +}; + +void machine_dependent_init (void) { + + struct hashtab_name *key; + + struct reg_entry *reg_entry; + struct templates *templates; + + struct template *template = template_table; + int ch; + + templates = xmalloc (sizeof (*templates)); + templates->name = xstrdup (template->name); + templates->start = template; + + for (;;) { + + template++; + + if (!template->name || strcmp (template->name, (template - 1)->name)) { + + templates->end = template; + + if (hashtab_get_key (&hashtab_templates, templates->name)) { + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", templates->name); + } else { + + if (!(key = hashtab_alloc_name (templates->name))) { + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", templates->name); + } else { + hashtab_put (&hashtab_templates, key, templates); + } + + } + + if (!template->name) { + break; + } + + templates = xmalloc (sizeof (*templates)); + templates->name = xstrdup (template->name); + templates->start = template; + + } + + } + + for (reg_entry = reg_table; reg_entry->name; reg_entry++) { + + if ((reg_entry->type & REG32) && reg_entry->number == 4) { + reg_esp = reg_entry; + } + + if (reg_entry->type & SEGMENT1) { + + switch (reg_entry->number) { + + case 2: + + reg_ss = reg_entry; + break; + + case 3: + + reg_ds = reg_entry; + break; + + } + + } + + if (hashtab_get_key (&hashtab_regs, reg_entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", reg_entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (reg_entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", reg_entry->name); + continue; + + } + + hashtab_put (&hashtab_regs, key, reg_entry); + + } + + for (ch = 0; ch < 255; ch++) { + + if (islower (ch) || isdigit (ch)) { + register_chars_table[ch] = ch; + } else if (isupper (ch)) { + register_chars_table[ch] = tolower (ch); + } + + } + + expr_type_set_rank (EXPR_TYPE_FULL_PTR, intel_syntax ? 10 : 0); + + machine_dependent_set_march ("i8086"); + install_pseudo_op_table (pseudo_op_table); + +} + +void machine_dependent_handle_proc (char *start, char **pp, char *name) { + + struct symbol *symbol; + struct proc *proc; + + (void) pp; + + proc = xmalloc (sizeof (*proc)); + proc->name = xstrdup (name); + + symbol = symbol_label (start, skip_whitespace (start), name); + symbol->scope = SYMBOL_SCOPE_LOCAL; + + proc->filename = xstrdup (get_filename ()); + proc->line_number = get_line_number (); + + vec_push (&state->procs, (void *) proc); + +} + +void machine_dependent_handle_endp (char *start, char **pp, char *name) { + + struct proc *proc; + int last; + + (void) pp; + + if (state->procs.length == 0) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "block nesting error"); + return; + + } + + last = state->procs.length - 1; + proc = state->procs.data[last]; + + if (strcmp (proc->name, name)) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start), "procedure name does not match"); + return; + + } + + free (proc->name); + free (proc->filename); + + vec_pop (&state->procs); + +} + + +static struct reg_entry bad_register = { "", 0, 0 }; + +static int check_reg (struct reg_entry *reg) { + + if (!(cpu_arch_flags & CPU_386) && (reg->type & (REG32 | SEGMENT2 | CONTROL | DEBUG))) { + return 0; + } + + if ((reg->type & SEGMENT1) && reg->number == REG_FLAT_NUMBER && !intel_syntax) { + return 0; + } + + return 1; + +} + +/** + * Returns 0 when the new prefix is of the same type as already present prefixes, + * 2 when REPE or REPNE prefix is added and 1 when other prefix is added. + */ +static int add_prefix (unsigned char prefix) { + + unsigned int prefix_type; + int ret = 1; + + switch (prefix) { + + case CS_PREFIX_OPCODE: + case DS_PREFIX_OPCODE: + case ES_PREFIX_OPCODE: + case FS_PREFIX_OPCODE: + case GS_PREFIX_OPCODE: + case SS_PREFIX_OPCODE: + + prefix_type = SEGMENT_PREFIX; + break; + + case REPNE_PREFIX_OPCODE: + case REPE_PREFIX_OPCODE: + + prefix_type = REP_PREFIX; + + ret = 2; + break; + + case ADDR_PREFIX_OPCODE: + + prefix_type = ADDR_PREFIX; + break; + + case DATA_PREFIX_OPCODE: + + prefix_type = DATA_PREFIX; + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "add_prefix invalid case %i", prefix); + exit (EXIT_FAILURE); + + } + + if (instruction.prefixes[prefix_type]) { + ret = 0; + } + + if (ret) { + + instruction.prefix_count++; + instruction.prefixes[prefix_type] = prefix; + + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "same type of prefix used twice"); + } + + return (ret); + +} + + +static struct reg_entry *parse_register (const char *reg_string, char **end_pp) { + + struct reg_entry *reg; + + char *p, *p_into_reg_name_cleaned; + char reg_name_cleaned[MAX_REG_NAME_SIZE + 1]; + + p = skip_whitespace ((char *) reg_string); + + for (p_into_reg_name_cleaned = reg_name_cleaned; (*(p_into_reg_name_cleaned++) = register_chars_table[(int) *p]) != '\0'; p++) { + + if (p_into_reg_name_cleaned >= reg_name_cleaned + MAX_REG_NAME_SIZE) { + return 0; + } + + } + + if (is_name_part ((int) *p)) { + return 0; + } + + reg = machine_dependent_find_reg_entry (reg_name_cleaned); + *end_pp = p; + + if (!reg) { + return 0; + } else if (check_reg (reg)) { + return reg; + } + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "register %s cannot be used here", reg->name); + return &bad_register; + +} + +static char *parse_instruction (char *line) { + + const char *expecting_string_instruction = 0; + const struct template *template; + + char *p2; + char saved_ch; + + current_templates = 0; + + while (1) { + + p2 = line = skip_whitespace (line); + + while ((*p2 != ' ') && (*p2 != '\t') && (*p2 != '\0')) { + + *p2 = tolower ((int) *p2); + p2++; + + } + + saved_ch = *p2; + *p2 = '\0'; + + if (line == p2) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expecting mnemonic; got nothing"); + return (line); + + } + + current_templates = machine_dependent_find_templates (line, 0); + + if (saved_ch && (*skip_whitespace (p2 + 1)) && current_templates && (current_templates->start->opcode_modifier & IS_PREFIX)) { + + if ((current_templates->start->opcode_modifier & (SIZE16 | SIZE32)) && ((current_templates->start->opcode_modifier & SIZE32) && (bits ^= 16))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "redundant %s prefix", current_templates->name); + return 0; + + } + + switch (add_prefix (current_templates->start->base_opcode)) { + + case 0: + + return 0; + + case 2: + + expecting_string_instruction = current_templates->name; + break; + + } + + *p2 = saved_ch; + line = p2 + 1; + + } else { + break; + } + + } + + if (current_templates == 0) { + + switch (p2[-1]) { + + case WORD_SUFFIX: + case BYTE_SUFFIX: + case QWORD_SUFFIX: + + instruction.suffix = p2[-1]; + p2[-1] = '\0'; + + break; + + case SHORT_SUFFIX: + case DWORD_SUFFIX: + + if (!intel_syntax) { + + instruction.suffix = p2[-1]; + p2[-1] = '\0'; + + } + + break; + + /* Intel syntax only. */ + case 'd': + + if (intel_syntax) { + + instruction.suffix = DWORD_SUFFIX; + p2[-1] = '\0'; + + } + + break; + + default: + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no such instruction '%s'", line); + return 0; + + } + + current_templates = machine_dependent_find_templates (line, 0); + + if (current_templates == 0) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no such instruction '%s'", line); + return 0; + + } + } + + if (expecting_string_instruction) { + + if (!(current_templates->start->opcode_modifier & IS_STRING)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expecting string instruction after '%s'", expecting_string_instruction); + return 0; + + } + + } + + for (template = current_templates->start; template < current_templates->end; template++) { + + if (template->cpu_flags == 0 || (template->cpu_flags & cpu_arch_flags)) { + goto end; + } + + } + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%s' is not supported on '%s%s'", + current_templates->name, cpu_arch_name ? cpu_arch_name : DEFAULT_CPU_ARCH_NAME, cpu_extensions_name ? cpu_extensions_name : ""); + + return 0; + +end: + + *p2 = saved_ch; + line = p2; + + return (line); + +} + + +static int intel_simplify_expr (struct expr *expr); + +static int finalize_immediate (struct expr *expr, const char *imm_start) { + + if (expr->type == EXPR_TYPE_INVALID || expr->type == EXPR_TYPE_ABSENT) { + + if (imm_start) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "missing or invalid immediate expression '%s'", imm_start); + } + + return 1; + + } else if (expr->type == EXPR_TYPE_CONSTANT) { + + /* Size will be determined later. */ + instruction.types[instruction.operands] |= IMM16; + + } else { + + /* It is an address and size will determined later. */ + instruction.types[instruction.operands] = IMM8 | IMM16; + + } + + return 0; + +} + +static int finalize_displacement (struct expr *expr, const char *disp_start) { + + if (expr->type == EXPR_TYPE_INVALID || expr->type == EXPR_TYPE_ABSENT) { + + if (disp_start) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "missing or invalid displacement expression '%s'", disp_start); + } + + return 1; + + } + + return 0; + +} + +static int base_index_check (char *operand_string) { + + if (bits == 32) { + + if ((instruction.base_reg && !(instruction.base_reg->type & REG32)) + || (instruction.index_reg && (!(instruction.index_reg->type & BASE_INDEX) || !(instruction.index_reg->type & REG32)))) { + bad: + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%s' is not a valid base/index expression", operand_string); + return 1; + + } + + } else { + + if ((instruction.base_reg && (!(instruction.base_reg->type & BASE_INDEX) || !(instruction.base_reg->type & REG16))) + || (instruction.index_reg && (!(instruction.index_reg->type & BASE_INDEX) || !(instruction.index_reg->type & REG16) + || !(instruction.base_reg && instruction.base_reg->number < 6 && instruction.index_reg->number >= 6 + && instruction.log2_scale_factor == 0)))) { + goto bad; + } + + } + + return 0; + +} + +static int intel_parse_operand (char *start, char *operand_string) { + + int ret; + + struct expr expr_buf, *expr; + char *operand_start; + + memset (&intel_state, 0, sizeof (intel_state)); + intel_state.operand_modifier = EXPR_TYPE_ABSENT; + + expr = &expr_buf; + operand_start = operand_string; + + intel_syntax = -1; + expression_read_into (start, &operand_string, expr); + + ret = intel_simplify_expr (expr); + intel_syntax = 1; + + operand_string = skip_whitespace (operand_string); + + if (*operand_string) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "junk '%s' after expression", operand_string); + return 1; + + } else if (!intel_state.has_offset && operand_string > operand_start && strrchr (operand_start, ']') && skip_whitespace (strrchr (operand_start, ']') + 1) == operand_string) { + + intel_state.is_mem |= 1; + intel_state.is_indirect = 1; + + } + + if (!ret) { + return 1; + } + + ret = 0; + + if (intel_state.operand_modifier != EXPR_TYPE_ABSENT && current_templates->start->base_opcode != 0x8D /* lea */) { + + char suffix = 0; + + switch (intel_state.operand_modifier) { + + case EXPR_TYPE_BYTE_PTR: + + suffix = BYTE_SUFFIX; + break; + + case EXPR_TYPE_WORD_PTR: + + suffix = WORD_SUFFIX; + break; + + case EXPR_TYPE_DWORD_PTR: + + if (bits != 32 && ((current_templates->start->opcode_modifier & JUMP) || (current_templates->start->opcode_modifier & CALL))) { + suffix = INTEL_SUFFIX; + } else { + suffix = DWORD_SUFFIX; + } + + break; + + case EXPR_TYPE_FWORD_PTR: + + /* lgdt, lidt, sgdt, sidt accept fword ptr but ignore it. */ + if ((current_templates->name[0] == 'l' || current_templates->name[0] == 's') && (current_templates->name[1] == 'g' || current_templates->name[1] == 'i') && current_templates->name[2] == 'd' && current_templates->name[3] == 't' && current_templates->name[4] == '\0') { + break; + } + + if (bits == 16) { + add_prefix (DATA_PREFIX_OPCODE); + } + + suffix = INTEL_SUFFIX; + break; + + case EXPR_TYPE_QWORD_PTR: + + suffix = QWORD_SUFFIX; + break; + + case EXPR_TYPE_FAR_PTR: + + suffix = INTEL_SUFFIX; + break; + + default: + + break; + + } + + if (!instruction.suffix) { + instruction.suffix = suffix; + } else if (instruction.suffix != suffix) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "conficting operand size modifiers"); + return 1; + + } + + } + + if ((current_templates->start->opcode_modifier & JUMP) || (current_templates->start->opcode_modifier & CALL) || (current_templates->start->opcode_modifier & JUMPINTERSEGMENT)) { + + int is_absolute_jump = 0; + + if (instruction.regs[instruction.operands] || intel_state.base_reg || intel_state.index_reg || intel_state.is_mem > 1) { + is_absolute_jump = 1; + } else { + + switch (intel_state.operand_modifier) { + + case EXPR_TYPE_NEAR_PTR: + + if (intel_state.segment) { + is_absolute_jump = 1; + } else { + intel_state.is_mem = 1; + } + + break; + + case EXPR_TYPE_FAR_PTR: + case EXPR_TYPE_ABSENT: + + if (!intel_state.segment) { + + intel_state.is_mem = 1; + + if (intel_state.operand_modifier == EXPR_TYPE_ABSENT) { + + if (intel_state.is_indirect) { + is_absolute_jump = 1; + } + + break; + + } + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "cannot infer the segment part of the operand"); + return 1; + + } else if (symbol_get_section (intel_state.segment) == reg_section) { + is_absolute_jump = 1; + } else { + + /* Something like "jmp 12:34" must be converted into "jmp 12, 34". */ + instruction.imms[instruction.operands] = &operand_exprs[operand_exprs_count++]; + memset (instruction.imms[instruction.operands], 0, sizeof (*instruction.imms[instruction.operands])); + + instruction.imms[instruction.operands]->type = EXPR_TYPE_SYMBOL; + instruction.imms[instruction.operands]->add_symbol = intel_state.segment; + + resolve_expression (instruction.imms[instruction.operands]); + + if (finalize_immediate (instruction.imms[instruction.operands], operand_start)) { + return 1; + } + + instruction.operands++; + + if (instruction.suffix == INTEL_SUFFIX) { + instruction.suffix = 0; + } + + intel_state.segment = 0; + intel_state.is_mem = 0; + + } + + break; + + default: + + is_absolute_jump = 1; + break; + + } + + } + + if (is_absolute_jump) { + + instruction.types[instruction.operands] |= JUMP_ABSOLUTE; + intel_state.is_mem |= 1; + + } + + } + + if (instruction.regs[instruction.operands]) { + + instruction.types[instruction.operands] |= instruction.regs[instruction.operands]->type & ~BASE_INDEX; + instruction.reg_operands++; + + } else if (intel_state.base_reg || intel_state.index_reg || intel_state.segment || intel_state.is_mem) { + + if (instruction.mem_operands >= 1) { + + /** + * Handles "call 0x9090, 0x9090", "lcall 0x9090, 0x9090", + * "jmp 0x9090, 0x9090", "ljmp 0x9090, 0x9090". + */ + if (((current_templates->start->opcode_modifier & JUMP) || (current_templates->start->opcode_modifier & CALL) || (current_templates->start->opcode_modifier & JUMPINTERSEGMENT)) && instruction.operands == 1 && instruction.mem_operands == 1 && instruction.disp_operands == 1 && intel_state.segment == 0 && intel_state.operand_modifier == EXPR_TYPE_ABSENT) { + + instruction.operands = 0; + + if (!finalize_immediate (instruction.disps[instruction.operands], 0)) { + + instruction.imms[instruction.operands] = instruction.disps[instruction.operands]; + instruction.operands = 1; + + operand_exprs[operand_exprs_count] = *expr; + instruction.imms[instruction.operands] = &operand_exprs[operand_exprs_count++]; + + resolve_expression (instruction.imms[instruction.operands]); + + if (!finalize_immediate (instruction.imms[instruction.operands], operand_start)) { + + instruction.mem_operands = 0; + instruction.disp_operands = 0; + + instruction.operands = 2; + instruction.types[0] &= ~ANY_MEM; + + return 0; + + } + + } + + } + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "too many memory references for '%s'", current_templates->name); + return 1; + + } + + if (intel_state.base_reg && intel_state.index_reg && (intel_state.base_reg->type & REG16) && (intel_state.index_reg->type & REG16) && intel_state.base_reg->number >= 6 && intel_state.index_reg->number < 6) { + + /* Converts [si + bp] to [bp + si] as addition is commutative but other code accepts only (%bp,%si), not (%si,%bp). */ + instruction.base_reg = intel_state.index_reg; + instruction.index_reg = intel_state.base_reg; + + } else { + + instruction.base_reg = intel_state.base_reg; + instruction.index_reg = intel_state.index_reg; + + } + + if (instruction.base_reg || instruction.index_reg) { + instruction.types[instruction.operands] |= BASE_INDEX; + } + + operand_exprs[operand_exprs_count] = *expr; + expr = &operand_exprs[operand_exprs_count++]; + + resolve_expression (expr); + + if (expr->type != EXPR_TYPE_CONSTANT || expr->add_number || !(instruction.types[instruction.operands] & BASE_INDEX)) { + + instruction.disps[instruction.operands] = expr; + instruction.disp_operands++; + + instruction.types[instruction.operands] |= DISP16; + + if (finalize_displacement (instruction.disps[instruction.operands], operand_start)) { + return 1; + } + + } + + if (intel_state.segment) { + + int more_than_1_segment = 0; + + while (1) { + + expr = symbol_get_value_expression (intel_state.segment); + + if (expr->type != EXPR_TYPE_FULL_PTR || symbol_get_value_expression (expr->op_symbol)->type != EXPR_TYPE_REGISTER) { + break; + } + + intel_state.segment = expr->add_symbol; + more_than_1_segment = 1; + + } + + if (expr->type != EXPR_TYPE_REGISTER) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "segment register name expected"); + return 1; + + } + + if ((reg_table[expr->add_number].type & (SEGMENT1 | SEGMENT2)) == 0) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 1; + + } + + if (more_than_1_segment) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "redundant segment overrides"); + } + + if (reg_table[expr->add_number].number == REG_FLAT_NUMBER) { + instruction.segments[instruction.operands] = 0; + } else { + instruction.segments[instruction.operands] = ®_table[expr->add_number]; + } + + } + + if (base_index_check (operand_start)) { + return 1; + } + + instruction.mem_operands++; + + } else { + + operand_exprs[operand_exprs_count] = *expr; + instruction.imms[instruction.operands] = &operand_exprs[operand_exprs_count++]; + + resolve_expression (instruction.imms[instruction.operands]); + ret = finalize_immediate (instruction.imms[instruction.operands], operand_start); + + } + + instruction.operands++; + return ret; + +} + +static int parse_operands (char *start, char **p_line) { + + char *line = *p_line; + + while (*line != '\0') { + + char *token_start; + int skipped_comma = 0; + + line = skip_whitespace (line); + token_start = line; + + while ((*line != ',')) { + + if (*line == '\0') { + break; + } else if (line[0] == '\'' && line[1] == ',' && !skipped_comma) { + + line += 2; + + skipped_comma = 1; + continue; + + } + + line++; + + } + + if (token_start != line) { + + int ret = 1; + char saved_ch; + + saved_ch = *line; + *line = '\0'; + + ret = intel_parse_operand (start, token_start); + *line = saved_ch; + + if (ret) { + + *p_line = line; + return 1; + + } + + } + + if (line[0] == '#' || (line[0] == '/' && line[1] == '/')) { + break; + } + + if (line[0] == '/' && line[1] == '*') { + + while (*line) { + + if (line[0] == '*' && line[1] == '/') { + + line += 2; + break; + + } + + line++; + + } + + } + + if (*line == ',') { line++; } + + } + + *p_line = line; + return 0; + +} + +static int intel_simplify_symbol (struct symbol *symbol) { + + int ret = intel_simplify_expr (symbol_get_value_expression (symbol)); + + if (ret == 2) { + + symbol_set_section (symbol, absolute_section); + ret = 1; + + } + + return ret; + +} + +static void intel_fold_symbol_into_expr (struct expr *expr, struct symbol *symbol) { + + struct expr *symbol_expr = symbol_get_value_expression (symbol); + + if (symbol_get_section (symbol) == absolute_section) { + + signed int add_number = expr->add_number; + + *expr = *symbol_expr; + expr->add_number += add_number; + + } else { + + expr->type = EXPR_TYPE_SYMBOL; + expr->add_symbol = symbol; + expr->op_symbol = 0; + + } + +} + +static int intel_process_register_expr (struct expr *expr) { + + int reg_num = expr->add_number; + + if (intel_state.in_offset || instruction.operands < 0) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 0; + + } + + if (!intel_state.in_bracket) { + + if (instruction.regs[instruction.operands]) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 0; + + } + + if ((reg_table[reg_num].type & SEGMENT1) && reg_table[reg_num].number == REG_FLAT_NUMBER) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of pseudo-register"); + return 0; + + } + + instruction.regs[instruction.operands] = reg_table + reg_num; + + } else if (!intel_state.base_reg && !intel_state.in_scale) { + intel_state.base_reg = reg_table + reg_num; + } else if (!intel_state.index_reg) { + intel_state.index_reg = reg_table + reg_num; + } else { + intel_state.index_reg = 0; + } + + return 2; + +} + +static int intel_simplify_expr (struct expr *expr) { + + int ret; + + switch (expr->type) { + + case EXPR_TYPE_INDEX: + + if (expr->add_symbol) { + + if (!intel_simplify_symbol (expr->add_symbol)) { + return 0; + } + + } + + if (!intel_state.in_offset) { + intel_state.in_bracket++; + } + + ret = intel_simplify_symbol (expr->op_symbol); + + if (!intel_state.in_offset) { + intel_state.in_bracket--; + } + + if (!ret) { + return 0; + } + + if (expr->add_symbol) { + expr->type = EXPR_TYPE_ADD; + } else { + intel_fold_symbol_into_expr (expr, expr->op_symbol); + } + + break; + + case EXPR_TYPE_OFFSET: + + intel_state.has_offset = 1; + + intel_state.in_offset++; + ret = intel_simplify_symbol (expr->add_symbol); + intel_state.in_offset--; + + if (!ret) { + return 0; + } + + intel_fold_symbol_into_expr (expr, expr->add_symbol); + return ret; + + case EXPR_TYPE_MULTIPLY: + + if (intel_state.in_bracket) { + + struct expr *scale_expr = 0; + + if (!intel_state.in_scale++) { + intel_state.scale_factor = 1; + } + + ret = intel_simplify_symbol (expr->add_symbol); + + if (ret && intel_state.index_reg) { + scale_expr = symbol_get_value_expression (expr->op_symbol); + } + + if (ret) { + ret = intel_simplify_symbol (expr->op_symbol); + } + + if (ret && !scale_expr && intel_state.index_reg) { + scale_expr = symbol_get_value_expression (expr->add_symbol); + } + + if (ret && scale_expr) { + + resolve_expression (scale_expr); + + if (scale_expr->type != EXPR_TYPE_CONSTANT) { + scale_expr->add_number = 0; + } + + intel_state.scale_factor *= scale_expr->add_number; + + } + + intel_state.in_scale--; + if (!ret ) { return 0; } + + if (!intel_state.in_scale) { + + switch (intel_state.scale_factor) { + + case 1: + + instruction.log2_scale_factor = 0; + break; + + case 2: + + instruction.log2_scale_factor = 1; + break; + + case 4: + + instruction.log2_scale_factor = 2; + break; + + case 8: + + instruction.log2_scale_factor = 3; + break; + + default: + + intel_state.index_reg = 0; + break; + + } + + } + + break; + + } + + goto default_; + + case EXPR_TYPE_SHORT: + + instruction.force_short_jump = 1; + goto ptr_after_setting_operand_modifier; + + case EXPR_TYPE_BYTE_PTR: + case EXPR_TYPE_WORD_PTR: + case EXPR_TYPE_DWORD_PTR: + case EXPR_TYPE_FWORD_PTR: + case EXPR_TYPE_QWORD_PTR: + case EXPR_TYPE_NEAR_PTR: + case EXPR_TYPE_FAR_PTR: + + if (intel_state.operand_modifier == EXPR_TYPE_ABSENT) { + intel_state.operand_modifier = expr->type; + } + + ptr_after_setting_operand_modifier: + + if (symbol_get_value_expression (expr->add_symbol)->type == EXPR_TYPE_REGISTER) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 0; + + } + + if (!intel_simplify_symbol (expr->add_symbol)) { + return 0; + } + + intel_fold_symbol_into_expr (expr, expr->add_symbol); + break; + + case EXPR_TYPE_FULL_PTR: + + if (symbol_get_value_expression (expr->op_symbol)->type == EXPR_TYPE_REGISTER) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 0; + + } + + if (!intel_simplify_symbol (expr->op_symbol)) { + return 0; + } + + if (!intel_state.in_offset) { + + if (!intel_state.segment) { + intel_state.segment = expr->add_symbol; + } else { + + struct expr temp_expr = { 0 }; + + temp_expr.type = EXPR_TYPE_FULL_PTR; + temp_expr.add_symbol = expr->add_symbol; + temp_expr.op_symbol = intel_state.segment; + + intel_state.segment = make_expr_symbol (&temp_expr); + + } + + } + + intel_fold_symbol_into_expr (expr, expr->op_symbol); + break; + + case EXPR_TYPE_REGISTER: + + if ((ret = intel_process_register_expr (expr)) == 2) { + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + } + + return ret; + + default: + default_: + + if (expr->add_symbol && !intel_simplify_symbol (expr->add_symbol)) { + return 0; + } + + if (expr->op_symbol && !intel_simplify_symbol (expr->op_symbol)) { + return 0; + } + + break; + + } + + if (expr->type == EXPR_TYPE_SYMBOL && !intel_state.in_offset) { + + struct section *section = symbol_get_section (expr->add_symbol); + + if (section != absolute_section && section != expr_section && section != reg_section) { + intel_state.is_mem |= 2 - !intel_state.in_bracket; + } + + } + + return 1; + +} + + +struct intel_type { + + const char *name; + + enum expr_type expr_type; + unsigned int size[2]; + +}; + +struct intel_operator { + + const char *name; + + enum expr_type expr_type; + unsigned int operands; + +}; + +static const struct intel_operator intel_operators[] = { + + { "and", EXPR_TYPE_BIT_AND, 2 }, + { "eq", EXPR_TYPE_EQUAL, 2 }, + { "ge", EXPR_TYPE_GREATER_EQUAL, 2 }, + { "gt", EXPR_TYPE_GREATER, 2 }, + { "le", EXPR_TYPE_LESSER_EQUAL, 2 }, + { "lt", EXPR_TYPE_LESSER, 2 }, + { "mod", EXPR_TYPE_MODULUS, 2 }, + { "ne", EXPR_TYPE_NOT_EQUAL, 2 }, + { "not", EXPR_TYPE_BIT_NOT, 1 }, + { "offset", EXPR_TYPE_OFFSET, 1 }, + { "or", EXPR_TYPE_BIT_INCLUSIVE_OR, 2 }, + { "shl", EXPR_TYPE_LEFT_SHIFT, 2 }, + { "shr", EXPR_TYPE_RIGHT_SHIFT, 2 }, + { "short", EXPR_TYPE_SHORT, 1 }, + { "xor", EXPR_TYPE_BIT_EXCLUSIVE_OR, 2 }, + + { 0, EXPR_TYPE_INVALID, 0 } + +}; + +#define INTEL_TYPE(name, size) { #name, EXPR_TYPE_##name##_PTR, { size, size } } + +static const struct intel_type intel_types[] = { + + INTEL_TYPE (BYTE, 1), + INTEL_TYPE (WORD, 2), + INTEL_TYPE (DWORD, 4), + INTEL_TYPE (FWORD, 6), + + { "near", EXPR_TYPE_NEAR_PTR, { 0xFF02, 0xFF04 } }, + { "far", EXPR_TYPE_FAR_PTR, { 0xFF05, 0xFF06 } }, + + { 0, EXPR_TYPE_INVALID, { 0, 0 } } + +}; + +#undef INTEL_TYPE + +static int intel_parse_name (struct expr *expr, char *name) { + + int i; + + if (strcmp (name, "$") == 0) { + + current_location (expr); + return 1; + + } + + for (i = 0; intel_types[i].name; i++) { + + if (xstrcasecmp (name, intel_types[i].name) == 0) { + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_symbol = 0; + expr->op_symbol = 0; + + expr->add_number = intel_types[i].size[0]; + return 1; + + } + + } + + return 0; + +} + +static void swap_2_operands (unsigned int op1, unsigned int op2) { + + int temp_type; + + struct reg_entry *temp_reg; + struct expr *temp_expr; + + temp_type = instruction.types[op1]; + instruction.types[op1] = instruction.types[op2]; + instruction.types[op2] = temp_type; + + temp_reg = instruction.regs[op1]; + instruction.regs[op1] = instruction.regs[op2]; + instruction.regs[op2] = temp_reg; + + temp_expr = instruction.disps[op1]; + instruction.disps[op1] = instruction.disps[op2]; + instruction.disps[op2] = temp_expr; + + temp_expr = instruction.imms[op1]; + instruction.imms[op1] = instruction.imms[op2]; + instruction.imms[op2] = temp_expr; + +} + +static void swap_operands (void) { + + swap_2_operands (0, instruction.operands - 1); + + if (instruction.mem_operands == 2) { + + struct reg_entry *seg = instruction.segments[0]; + instruction.segments[0] = instruction.segments[1]; + instruction.segments[1] = seg; + + } + +} + +static int fits_in_signed_byte (signed long number) { + return ((number >= -128) && (number <= 127)); +} + +static int fits_in_unsigned_byte (signed long number) { + return ((number & 0xff) == number); +} + +static int fits_in_signed_word (signed long number) { + return ((number >= -32768) && (number <= 32767)); +} + +static int fits_in_unsigned_word (signed long number) { + return ((number & 0xffff) == number); +} + +static unsigned int smallest_imm_type (long number) { + + if (fits_in_signed_byte (number)) { + return (IMM8S | IMM8 | IMM16 | IMM32); + } + + if (fits_in_unsigned_byte (number)) { + return (IMM8 | IMM16 | IMM32); + } + + if (fits_in_signed_word (number) || fits_in_unsigned_word (number)) { + return (IMM16 | IMM32); + } + + return IMM32; + +} + +static void optimize_size_of_disps (void) { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.types[operand] & DISP) { + + if (instruction.disps[operand]->type == EXPR_TYPE_CONSTANT) { + + unsigned long disp = instruction.disps[operand]->add_number; + + if (instruction.types[operand] & DISP32) { + + disp &= 0xffffffff; + disp = (disp ^ (1UL << 31)) - (1UL << 31); + + } + + if ((instruction.types[operand] & (DISP16 | DISP32)) && fits_in_signed_byte (disp)) { + instruction.types[operand] |= DISP8; + } + + } + + } + + } + +} + +static void optimize_size_of_imms (void) { + + char guessed_suffix = 0; + int operand; + + if (instruction.suffix) { + guessed_suffix = instruction.suffix; + } else if (instruction.reg_operands) { + + /** + * Guesses a suffix from the last register operand + * what is good enough for shortening immediates + * but the real suffix cannot be set yet. + * Example: mov $1234, %al + */ + for (operand = instruction.operands; --operand >= 0; ) { + + if (instruction.types[operand] & REG) { + + guessed_suffix = ((instruction.types[operand] & REG8) ? BYTE_SUFFIX : (instruction.types[operand] & REG16) ? WORD_SUFFIX : DWORD_SUFFIX); + break; + + } + + } + + } else if ((bits == 16) ^ (instruction.prefixes[DATA_PREFIX] != 0)) { + + /** + * Immediate shortening for 16 bit code. + * Example: .code16\n push $12341234 + */ + guessed_suffix = WORD_SUFFIX; + + } + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.types[operand] & IMM) { + + if (instruction.imms[operand]->type == EXPR_TYPE_CONSTANT) { + + /* If a suffix is given, it is allowed to shorten the immediate. */ + switch (guessed_suffix) { + + case BYTE_SUFFIX: + + instruction.types[operand] |= IMM8 | IMM8S | IMM16 | IMM32; + break; + + case WORD_SUFFIX: + + instruction.types[operand] |= IMM16 | IMM32; + break; + + case DWORD_SUFFIX: + + instruction.types[operand] |= IMM32; + break; + + } + + if (instruction.types[0] & IMM32) { + + instruction.imms[operand]->add_number &= 0xffffffff; + instruction.imms[operand]->add_number = ((instruction.imms[operand]->add_number ^ (1UL << 31)) - (1UL << 31)); + + } + + instruction.types[operand] |= smallest_imm_type (instruction.imms[operand]->add_number); + + } + + } + + } + +} + +#define MATCH(overlap, operand_type) (((overlap) & ~JUMP_ABSOLUTE) && (((operand_type) & (BASE_INDEX | JUMP_ABSOLUTE)) == ((overlap) & (BASE_INDEX | JUMP_ABSOLUTE)))) + +static int match_template (void) { + + unsigned int found_reverse_match = 0, suffix_check = 0; + struct template *template; + + switch (instruction.suffix) { + + case BYTE_SUFFIX: + + suffix_check = NO_BSUF; + break; + + case WORD_SUFFIX: + + suffix_check = NO_WSUF; + break; + + case SHORT_SUFFIX: + + suffix_check = NO_SSUF; + break; + + case DWORD_SUFFIX: + + suffix_check = NO_LSUF; + break; + + case QWORD_SUFFIX: + + suffix_check = NO_QSUF; + break; + + case INTEL_SUFFIX: + + suffix_check = NO_INTELSUF; + break; + + } + + for (template = current_templates->start; template < current_templates->end; template++) { + + unsigned int operand_type_overlap0, operand_type_overlap1, operand_type_overlap2; + + if (instruction.operands != template->operands) { + continue; + } + + if (template->cpu_flags && (template->cpu_flags & cpu_arch_flags) == 0) { + continue; + } + + if (template->opcode_modifier & suffix_check) { + continue; + } + + if (instruction.suffix == DWORD_SUFFIX && !(cpu_arch_flags & CPU_386) && !(template->opcode_modifier & IGNORE_SIZE)) { + continue; + } + + if (instruction.operands == 0) { + break; + } + + operand_type_overlap0 = instruction.types[0] & template->operand_types[0]; + + switch (template->operands) { + + case 1: + + if (!MATCH (operand_type_overlap0, instruction.types[0])) { + continue; + } + + if (operand_type_overlap0 == 0) { + continue; + } + + break; + + case 2: + case 3: + + operand_type_overlap1 = instruction.types[1] & template->operand_types[1]; + + if (!MATCH (operand_type_overlap0, instruction.types[0]) || !MATCH (operand_type_overlap1, instruction.types[1])) { + + if ((template->opcode_modifier & D) == 0) { + continue; + } + + operand_type_overlap0 = instruction.types[0] & template->operand_types[1]; + operand_type_overlap1 = instruction.types[1] & template->operand_types[0]; + + if (!MATCH (operand_type_overlap0, instruction.types[0]) || !MATCH (operand_type_overlap1, instruction.types[1])) { + continue; + } + + found_reverse_match = template->opcode_modifier & D; + + } else if (instruction.operands == 3) { + + operand_type_overlap2 = instruction.types[2] & template->operand_types[2]; + + if (!MATCH (operand_type_overlap2, instruction.types[2])) { + continue; + } + + } + + break; + + } + + break; + + } + + if (template == current_templates->end) { + + /* No match was found. */ + report_at (get_filename (), get_line_number (), REPORT_ERROR, "operands invalid for '%s'", current_templates->name); + return 1; + + } + + instruction.template = *template; + + if (state->model < 7) { + + if (template->base_opcode == 0xC3 && xstrcasecmp (template->name, "retn") && instruction.operands == 0 && state->model >= 4 && state->procs.length > 0) { + instruction.template.base_opcode = 0xCB; + } + + if (template->base_opcode == 0xC2 && instruction.operands == 1 && state->model >= 4 && state->procs.length > 0) { + instruction.template.base_opcode = 0xCA; + } + + } + + if (found_reverse_match) { + + instruction.template.base_opcode |= found_reverse_match; + + instruction.template.operand_types[0] = template->operand_types[1]; + instruction.template.operand_types[1] = template->operand_types[0]; + + } + + return 0; + +} + +static int check_byte_reg (void) { + + int op; + + for (op = instruction.operands; --op >= 0; ) { + + if (instruction.types[op] & REG8) { + continue; + } + + if ((instruction.types[op] & WORD_REG) && (instruction.regs[op]->number < 4)) { + + if (!(instruction.template.operand_types[op] & PORT)) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "using '%%%s' instead of '%%%s' due to '%c' suffix", (instruction.regs[op] - ((instruction.types[op] & REG16) ? 8 : 16))->name, instruction.regs[op]->name, instruction.suffix); + } + + continue; + + } + + if (instruction.types[op] & (REG | SEGMENT1 | SEGMENT2 | CONTROL | DEBUG | TEST)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%%%s' not allowed with '%s%c'.", instruction.regs[op]->name, instruction.template.name, instruction.suffix); + return 1; + + } + + } + + return 0; + +} + +static int check_word_reg (void) { + + int op; + + for (op = instruction.operands; --op >= 0; ) { + + if ((instruction.types[op] & REG8) && (instruction.template.operand_types[op] & (REG16 | REG32 | ACC))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%%%s' not allowed with '%s%c'.", instruction.regs[op]->name, instruction.template.name, instruction.suffix); + return 1; + + } + + if ((instruction.types[op] & REG32) && (instruction.template.operand_types[op] & (REG16 | ACC))) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "using '%%%s' instead of '%%%s' due to '%c' suffix", (instruction.regs[op]-8)->name, instruction.regs[op]->name, instruction.suffix); + } + + } + + return 0; + +} + +static int check_dword_reg (void) { + + int op; + + for (op = instruction.operands; --op >= 0; ) { + + if ((instruction.types[op] & REG8) && (instruction.template.operand_types[op] & (REG16 | REG32 | ACC))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%%%s' not allowed with '%s%c'.", instruction.regs[op]->name, instruction.template.name, instruction.suffix); + return 1; + + } + + if ((instruction.types[op] & REG16) && (instruction.template.operand_types[op] & (REG32 | ACC))) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "using '%%%s' instead of '%%%s' due to '%c' suffix", (instruction.regs[op]+8)->name, instruction.regs[op]->name, instruction.suffix); + } + + } + + return 0; + +} + +static unsigned int modrm_mode_from_disp_size (unsigned int type) { + return ((type & DISP8) ? 1 : ((type & (DISP16 | DISP32)) ? 2 : 0)); +} + +static int process_suffix (void) { + + int is_movsx_or_movzx = 0; + + if (instruction.template.opcode_modifier & (SIZE16 | SIZE32)) { + + if (instruction.template.opcode_modifier & SIZE16) { + instruction.suffix = WORD_SUFFIX; + } else { + instruction.suffix = DWORD_SUFFIX; + } + + } else if (instruction.reg_operands && (instruction.operands > 1 || (instruction.types[0] & REG))) { + + int saved_operands = instruction.operands; + + is_movsx_or_movzx = ((instruction.template.base_opcode & 0xFF00) == 0x0F00 + && ((instruction.template.base_opcode & 0xFF) | 8) == 0xBE); + + /* For movsx/movzx only the source operand is considered for the ambiguity checking. + * The suffix is replaced to represent the destination later. */ + if (is_movsx_or_movzx && (instruction.template.opcode_modifier & W)) { + instruction.operands--; + } + + if (!instruction.suffix) { + + int op; + + for (op = instruction.operands; --op >= 0; ) { + + if ((instruction.types[op] & REG) && !(instruction.template.operand_types[op] & SHIFT_COUNT) && !(instruction.template.operand_types[op] & PORT)) { + + instruction.suffix = ((instruction.types[op] & REG8) ? BYTE_SUFFIX : (instruction.types[op] & REG16) ? WORD_SUFFIX : DWORD_SUFFIX); + break; + + } + + } + + /* When .att_syntax is used, movsx and movzx silently default to byte memory source. */ + if (is_movsx_or_movzx && (instruction.template.opcode_modifier & W) && !instruction.suffix && !intel_syntax) { + instruction.suffix = BYTE_SUFFIX; + } + + } else { + + int ret; + + switch (instruction.suffix) { + + case BYTE_SUFFIX: + + ret = check_byte_reg (); + break; + + case WORD_SUFFIX: + + ret = check_word_reg (); + break; + + case DWORD_SUFFIX: + + ret = check_dword_reg (); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "process_suffix invalid case %i", instruction.suffix); + exit (EXIT_FAILURE); + + } + + if (ret) { return 1; } + + } + + /* Undoes the movsx/movzx change done above. */ + instruction.operands = saved_operands; + + } else if ((instruction.template.opcode_modifier & DEFAULT_SIZE) && !instruction.suffix) { + instruction.suffix = (bits == 32) ? DWORD_SUFFIX : WORD_SUFFIX; + } else if (!instruction.suffix && ((instruction.template.operand_types[0] & JUMP_ABSOLUTE) || (instruction.template.opcode_modifier & JUMPBYTE) || (instruction.template.opcode_modifier & JUMPINTERSEGMENT) /* lgdt, lidt, sgdt, sidt */ || ((instruction.template.base_opcode == 0x0F01 && instruction.template.extension_opcode <= 3)))) { + + if (bits == 32) { + + if (!(instruction.template.opcode_modifier & NO_LSUF)) { + instruction.suffix = DWORD_SUFFIX; + } + + } else { + + if (!(instruction.template.opcode_modifier & NO_WSUF)) { + instruction.suffix = WORD_SUFFIX; + } + + } + + } + + if (!instruction.suffix + && !(instruction.template.opcode_modifier & IGNORE_SIZE) + && !(instruction.template.opcode_modifier & DEFAULT_SIZE) + /* Explicit data size prefix allows determining the size. */ + && !instruction.prefixes[DATA_PREFIX] + /* fldenv and similar instructions do not require a suffix. */ + && (instruction.template.opcode_modifier & NO_SSUF)) { + + int suffixes = !(instruction.template.opcode_modifier & NO_BSUF); + + if (!(instruction.template.opcode_modifier & NO_WSUF)) { + suffixes |= 1 << 1; + } + + if (!(instruction.template.opcode_modifier & NO_SSUF)) { + suffixes |= 1 << 2; + } + + if (!(instruction.template.opcode_modifier & NO_LSUF)) { + suffixes |= 1 << 3; + } + + if (!(instruction.template.opcode_modifier & NO_INTELSUF)) { + suffixes |= 1 << 4; + } + + if (suffixes & (suffixes - 1)) { + + if (intel_syntax) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "ambiguous operand size for '%s'", instruction.template.name); + return 1; + + } + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "%s, using default for '%s'", intel_syntax + ? "ambiguous operand size" : "no instruction mnemonic suffix given and no register operands", + instruction.template.name); + + if (is_movsx_or_movzx) { + /* Handled below. */ + } else if (bits == 16) { + instruction.suffix = WORD_SUFFIX; + } else if (!(instruction.template.opcode_modifier & NO_LSUF)) { + instruction.suffix = DWORD_SUFFIX; + } else { + instruction.suffix = QWORD_SUFFIX; + } + + } + + } + + if (is_movsx_or_movzx) { + + /* The W modifier applies to the source memory or register, not to the destination register. */ + if ((instruction.template.opcode_modifier & W) && instruction.suffix && instruction.suffix != BYTE_SUFFIX) { + instruction.template.base_opcode |= 1; + } + + /* Changes the suffix to represent the destination and turns off the W modifier as it was already used above. */ + if ((instruction.template.opcode_modifier & W) || !instruction.suffix) { + + if (instruction.types[1] & REG16) { + instruction.suffix = WORD_SUFFIX; + } else { + instruction.suffix = DWORD_SUFFIX; + } + + instruction.template.opcode_modifier &= ~W; + + } + + } + + switch (instruction.suffix) { + + case DWORD_SUFFIX: + case WORD_SUFFIX: + case QWORD_SUFFIX: + + /* Selects word/dword operation. */ + if (instruction.template.opcode_modifier & W) { + + if (instruction.template.opcode_modifier & SHORT_FORM) { + instruction.template.base_opcode |= 8; + } else { + instruction.template.base_opcode |= 1; + } + + } + + /* fall through. */ + + case SHORT_SUFFIX: + + if (instruction.suffix != QWORD_SUFFIX + && !(instruction.template.opcode_modifier & IGNORE_SIZE) + && ((instruction.suffix == DWORD_SUFFIX) == (bits == 16))) { + + unsigned int prefix = DATA_PREFIX_OPCODE; + + if (instruction.template.opcode_modifier & JUMPBYTE) { + prefix = ADDR_PREFIX_OPCODE; + } + + if (!add_prefix (prefix)) { + return 1; + } + + } + + break; + + case 0: + + /* Selects word/dword operation based on explicit data size prefix + * if no suitable register are present. */ + if ((instruction.template.opcode_modifier & W) + && instruction.prefixes[DATA_PREFIX] + && (!instruction.reg_operands + || (instruction.reg_operands == 1 + && !(instruction.template.operand_types[0] & SHIFT_COUNT) + && !(instruction.template.operand_types[0] & PORT) + && !(instruction.template.operand_types[1] & PORT)))) { + + instruction.template.base_opcode |= 1; + + } + + break; + + } + + return 0; + +} + +static int finalize_imms (void) { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + unsigned int overlap = instruction.types[operand] & instruction.template.operand_types[operand]; + + if ((overlap & IMM) && (overlap != IMM8) && (overlap != IMM8S) && (overlap != IMM16) && (overlap != IMM32)) { + + if (instruction.suffix) { + + switch (instruction.suffix) { + + case BYTE_SUFFIX: + + overlap &= IMM8 | IMM8S; + break; + + case WORD_SUFFIX: + + overlap &= IMM16; + break; + + case DWORD_SUFFIX: + + overlap &= IMM32; + break; + + } + + } else if (overlap == (IMM16 | IMM32)) { + + if ((bits == 16) ^ (instruction.prefixes[DATA_PREFIX] != 0)) { + overlap = IMM16; + } else { + overlap = IMM32; + } + + } else if (instruction.prefixes[DATA_PREFIX]) { + overlap &= (bits != 16) ? IMM16 : IMM32; + } + + if ((overlap != IMM8) && (overlap != IMM8S) && (overlap != IMM16) && (overlap != IMM32)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no instruction suffix given; cannot determine immediate size"); + return 1; + + } + + } + + instruction.types[operand] = overlap; + + } + + return 0; + +} + +static const unsigned char segment_prefixes[] = { + + ES_PREFIX_OPCODE, + CS_PREFIX_OPCODE, + SS_PREFIX_OPCODE, + DS_PREFIX_OPCODE, + FS_PREFIX_OPCODE, + GS_PREFIX_OPCODE + +}; + +static int process_operands (void) { + + if (instruction.template.opcode_modifier & REG_DUPLICATION) { + + unsigned int first_reg_operand = (instruction.types[0] & REG) ? 0 : 1; + + instruction.regs[first_reg_operand + 1] = instruction.regs[first_reg_operand]; + instruction.types[first_reg_operand + 1] = instruction.types[first_reg_operand]; + instruction.reg_operands = 2; + + } + + if (instruction.template.opcode_modifier & SHORT_FORM) { + + int operand = (instruction.types[0] & REG) ? 0 : 1; + instruction.template.base_opcode |= instruction.regs[operand]->number; + + } + + if (instruction.template.opcode_modifier & MODRM) { + + if (instruction.reg_operands == 2) { + + unsigned int source, dest; + + source = (instruction.types[0] & (REG | SEGMENT1 | SEGMENT2 | CONTROL | DEBUG | TEST)) ? 0 : 1; + dest = source + 1; + + instruction.modrm.mode = 3; + + if ((instruction.template.operand_types[dest] & ANY_MEM) == 0) { + + instruction.modrm.regmem = instruction.regs[source]->number; + instruction.modrm.reg = instruction.regs[dest]->number; + + } else { + + instruction.modrm.regmem = instruction.regs[dest]->number; + instruction.modrm.reg = instruction.regs[source]->number; + + } + + } else { + + if (instruction.mem_operands) { + + int fake_zero_displacement = 0; + int operand = 0; + + if (instruction.types[0] & ANY_MEM) { + ; + } else if (instruction.types[1] & ANY_MEM) { + operand = 1; + } else { + operand = 2; + } + + if (instruction.base_reg == 0) { + + instruction.modrm.mode = 0; + + if (instruction.disp_operands == 0) { + fake_zero_displacement = 1; + } + + if (instruction.index_reg == 0) { + + if ((bits == 16) ^ (instruction.prefixes[ADDR_PREFIX] != 0)) { + + instruction.modrm.regmem = SIB_BASE_NO_BASE_REGISTER_16; + instruction.types[operand] = DISP16; + + } else { + + instruction.modrm.regmem = SIB_BASE_NO_BASE_REGISTER; + instruction.types[operand] = DISP32; + + } + + } else { + + instruction.sib.base = SIB_BASE_NO_BASE_REGISTER; + instruction.sib.index = instruction.index_reg->number; + instruction.sib.scale = instruction.log2_scale_factor; + + instruction.modrm.regmem = MODRM_REGMEM_TWO_BYTE_ADDRESSING; + + instruction.types[operand] &= ~DISP; + instruction.types[operand] |= DISP32; + + } + + } else if (instruction.base_reg->type & REG16) { + + switch (instruction.base_reg->number) { + + case 3: + + if (instruction.index_reg == 0) { + instruction.modrm.regmem = 7; + } else { + instruction.modrm.regmem = (instruction.index_reg->number - 6); + } + + break; + + case 5: + + if (instruction.index_reg == 0) { + + instruction.modrm.regmem = 6; + + if ((instruction.types[operand] & DISP) == 0) { + + fake_zero_displacement = 1; + instruction.types[operand] |= DISP8; + + } + + } else { + instruction.modrm.regmem = (instruction.index_reg->number - 6 + 2); + } + + break; + + default: + + instruction.modrm.regmem = (instruction.base_reg->number - 6 + 4); + break; + + } + + instruction.modrm.mode = modrm_mode_from_disp_size (instruction.types[operand]); + + } else { + + if (bits == 16 && (instruction.types[operand] & BASE_INDEX)) { + add_prefix (ADDR_PREFIX_OPCODE); + } + + instruction.modrm.regmem = instruction.base_reg->number; + + instruction.sib.base = instruction.base_reg->number; + instruction.sib.scale = instruction.log2_scale_factor; + + if (instruction.base_reg->number == 5 && instruction.disp_operands == 0) { + + fake_zero_displacement = 1; + instruction.types[operand] |= DISP8; + + } + + if (instruction.index_reg) { + + instruction.sib.index = instruction.index_reg->number; + instruction.modrm.regmem = MODRM_REGMEM_TWO_BYTE_ADDRESSING; + + } else { + instruction.sib.index = SIB_INDEX_NO_INDEX_REGISTER; + } + + instruction.modrm.mode = modrm_mode_from_disp_size (instruction.types[operand]); + + } + + if (fake_zero_displacement) { + + struct expr *expr = &operand_exprs[operand_exprs_count++]; + instruction.disps[operand] = expr; + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + expr->add_symbol = 0; + expr->op_symbol = 0; + + } + + } + + if (instruction.reg_operands) { + + int operand = 0; + + if (instruction.types[0] & (REG | SEGMENT1 | SEGMENT2 | CONTROL | DEBUG | TEST)) { + ; + } else if (instruction.types[1] & (REG | SEGMENT1 | SEGMENT2 | CONTROL | DEBUG | TEST)) { + operand = 1; + } else { + operand = 2; + } + + if (instruction.template.extension_opcode != NONE) { + instruction.modrm.regmem = instruction.regs[operand]->number; + } else { + instruction.modrm.reg = instruction.regs[operand]->number; + } + + if (instruction.mem_operands == 0) { + instruction.modrm.mode = 3; + } + + } + + if (instruction.template.extension_opcode != NONE) { + instruction.modrm.reg = instruction.template.extension_opcode; + } + + } + + } + + if (instruction.template.opcode_modifier & SEGSHORTFORM) { + + if ((instruction.template.base_opcode == POP_SEGMENT_SHORT) && (instruction.regs[0]->number == 1)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'pop %%cs' is not valid"); + return 1; + + } + + instruction.template.base_opcode |= instruction.regs[0]->number << 3; + + } + + { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.segments[operand]) { + + add_prefix (segment_prefixes[instruction.segments[operand]->number]); + break; + + } + + } + + } + + return 0; + +} + +static void output_jump (void) { + + struct symbol *symbol; + unsigned int relax_subtype; + + unsigned long offset; + unsigned long opcode_offset_in_buf; + + unsigned int code16 = 0; + + if (bits == 16) { + code16 = RELAX_SUBTYPE_CODE16_JUMP; + } + + if (instruction.prefixes[DATA_PREFIX]) { + + frag_append_1_char (instruction.prefixes[DATA_PREFIX]); + instruction.prefix_count--; + + code16 ^= RELAX_SUBTYPE_CODE16_JUMP; + + } + + if ((instruction.prefixes[SEGMENT_PREFIX] == CS_PREFIX_OPCODE) || (instruction.prefixes[SEGMENT_PREFIX] == DS_PREFIX_OPCODE)) { + + frag_append_1_char (instruction.prefixes[SEGMENT_PREFIX]); + instruction.prefix_count--; + + } + + if (instruction.prefix_count) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "skipping prefixes on this instruction"); + } + + frag_alloc_space (2 + 4); + + opcode_offset_in_buf = current_frag->fixed_size; + frag_append_1_char (instruction.template.base_opcode); + + if (instruction.disps[0]->type == EXPR_TYPE_CONSTANT) { + + /* "jmp 5" is converted to "temp_label: jmp 1 + temp_label + 5". + * The "1" is the size of the opcode + * and it is included by calling symbol_temp_new_now () + * after the opcode is written above. + */ + instruction.disps[0]->type = EXPR_TYPE_SYMBOL; + instruction.disps[0]->add_symbol = symbol_temp_new_now (); + + } + + symbol = instruction.disps[0]->add_symbol; + offset = instruction.disps[0]->add_number; + + if (!instruction.force_short_jump) { + + if (instruction.template.base_opcode == PC_RELATIVE_JUMP) { + relax_subtype = ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_SHORT_JUMP); + } else if (cpu_arch_flags & CPU_386) { + relax_subtype = ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_SHORT_JUMP); + } else { + relax_subtype = ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_SHORT_JUMP); + } + + relax_subtype |= code16; + frag_set_as_variant (RELAX_TYPE_MACHINE_DEPENDENT, relax_subtype, symbol, offset, opcode_offset_in_buf); + + } else { + frag_set_as_variant (RELAX_TYPE_MACHINE_DEPENDENT, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_FORCED_SHORT_JUMP, RELAX_SUBTYPE_SHORT_JUMP), symbol, offset, opcode_offset_in_buf); + } + +} + +void machine_dependent_number_to_chars (unsigned char *p, unsigned int number, unsigned int size); + +static void output_call_or_jumpbyte (void) { + + struct fixup *fixup = 0; + int size; + + if (instruction.template.opcode_modifier & JUMPBYTE) { + + size = 1; + + if (instruction.prefixes[ADDR_PREFIX]) { + + frag_append_1_char (instruction.prefixes[ADDR_PREFIX]); + instruction.prefix_count--; + + } + + if ((instruction.prefixes[SEGMENT_PREFIX] == CS_PREFIX_OPCODE) || (instruction.prefixes[SEGMENT_PREFIX] == DS_PREFIX_OPCODE)) { + + frag_append_1_char (instruction.prefixes[SEGMENT_PREFIX]); + instruction.prefix_count--; + + } + + } else { + + unsigned int code16 = 0; + + if (bits == 16) { + code16 = RELAX_SUBTYPE_CODE16_JUMP; + } + + if (instruction.prefixes[DATA_PREFIX]) { + + frag_append_1_char (instruction.prefixes[DATA_PREFIX]); + instruction.prefix_count--; + + code16 ^= RELAX_SUBTYPE_CODE16_JUMP; + + } + + size = code16 ? 2 : 4; + + } + + if (instruction.prefix_count) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "skipping prefixes on this instruction"); + } + + if (state->model < 7 && state->procs.length > 0) { + + if (instruction.template.base_opcode == 0xE8 && size == 2 && state->model >= 4) { + + instruction.template.base_opcode = 0x9A; + size += 2; + + } + + } + + frag_append_1_char (instruction.template.base_opcode); + + if (instruction.template.opcode_modifier & JUMPBYTE || state->model < 4) { + + if (instruction.disps[0]->type == EXPR_TYPE_CONSTANT) { + + /** + *"call 5" is converted to "temp_label: call 1 + temp_label + 5". + * The "1" is the size of the opcode + * and it is included by calling symbol_temp_new_now () + * after the opcode is written above. + */ + instruction.disps[0]->type = EXPR_TYPE_SYMBOL; + instruction.disps[0]->add_symbol = symbol_temp_new_now (); + + } + + fixup = fixup_new_expr (current_frag, current_frag->fixed_size, size, instruction.disps[0], 1, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } else if (state->procs.length == 0 || size == 2 || state->model == 7) { + + if (instruction.disps[0]->type == EXPR_TYPE_CONSTANT) { + + /* "call 5" is converted to "temp_label: call 1 + temp_label + 5". + * The "1" is the size of the opcode + * and it is included by calling symbol_temp_new_now () + * after the opcode is written above. + */ + instruction.disps[0]->type = EXPR_TYPE_SYMBOL; + instruction.disps[0]->add_symbol = symbol_temp_new_now (); + + } + + fixup = fixup_new_expr (current_frag, current_frag->fixed_size, size, instruction.disps[0], 1, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } else { + + unsigned char *p = frag_increase_fixed_size (size); + + if (instruction.disps[0]->type == EXPR_TYPE_CONSTANT) { + machine_dependent_number_to_chars (p, instruction.disps[0]->add_number, size); + } else { + fixup = fixup_new_expr (current_frag, p - current_frag->buf, size, instruction.disps[0], 1, RELOC_TYPE_FAR_CALL); + } + + } + + if (fixup && size == 1) { + fixup->fixup_signed = 1; + } + +} + +static void output_intersegment_jump (void) { + + unsigned int code16 = 0, size; + unsigned char *p; + + if (bits == 16) { + code16 = RELAX_SUBTYPE_CODE16_JUMP; + } + + if (instruction.prefixes[DATA_PREFIX]) { + + frag_append_1_char (instruction.prefixes[DATA_PREFIX]); + instruction.prefix_count--; + + code16 ^= RELAX_SUBTYPE_CODE16_JUMP; + + } + + if (instruction.prefix_count) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "skipping prefixes on this instruction"); + } + + size = code16 ? 2 : 4; + frag_append_1_char (instruction.template.base_opcode); + + /* size for the offset, 2 for the segment. */ + p = frag_increase_fixed_size (size + 2); + + if (instruction.imms[1]->type == EXPR_TYPE_CONSTANT) { + + if ((size == 2) && !fits_in_unsigned_word (instruction.imms[1]->add_number) && !fits_in_signed_word (instruction.imms[1]->add_number)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "16-bit jump out of range."); + return; + + } + + machine_dependent_number_to_chars (p, instruction.imms[1]->add_number, size); + + } else { + fixup_new_expr (current_frag, p - current_frag->buf, size, instruction.imms[1], 0, RELOC_TYPE_DEFAULT); + } + + if (instruction.imms[0]->type != EXPR_TYPE_CONSTANT) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "cannot handle non absolute segment in '%s'", instruction.template.name); + } + + machine_dependent_number_to_chars (p + size, instruction.imms[0]->add_number, size); + +} + +static long convert_number_to_size (unsigned long value, int size) { + + unsigned long mask; + + switch (size) { + + case 1: + + mask = 0xff; + break; + + case 2: + + mask = 0xffff; + break; + + case 4: + + mask = 0xffffffff; + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "convert_number_to_size invalid case %i", size); + exit (EXIT_FAILURE); + + } + + if ((value & ~mask) && ((value & ~mask) != ~mask)) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "%ld shortened to %ld", value, value & mask); + } + + value &= mask; + return value; + +} + +static int disp_size (unsigned int operand_type) { + + if (operand_type & DISP8) { + return 1; + } + + if (operand_type & DISP16) { + return 2; + } + + return 4; + +} + +static void output_disps (void) { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.types[operand] & DISP) { + + int size = disp_size (instruction.types[operand]); + + if (instruction.disps[operand]->type == EXPR_TYPE_CONSTANT) { + + unsigned long value = convert_number_to_size (instruction.disps[operand]->add_number, size); + machine_dependent_number_to_chars (frag_increase_fixed_size (size), value, size); + + } else { + + fixup_new_expr (current_frag, current_frag->fixed_size, size, instruction.disps[operand], 0, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } + + } + + } + +} + +static int imm_size (unsigned int operand_type) { + + if (operand_type & (IMM8 | IMM8S)) { + return 1; + } + + if (operand_type & IMM16) { + return 2; + } + + return 4; + +} + +static void output_imms (void) { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.types[operand] & IMM) { + + int size = imm_size (instruction.types[operand]); + + if (instruction.imms[operand]->type == EXPR_TYPE_CONSTANT) { + + unsigned long value = convert_number_to_size (instruction.imms[operand]->add_number, size); + machine_dependent_number_to_chars (frag_increase_fixed_size (size), value, size); + + } else { + + fixup_new_expr (current_frag, current_frag->fixed_size, size, instruction.imms[operand], 0, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } + + } + + } + +} + + +enum expr_type machine_dependent_parse_operator (char **pp, char *name, char *original_saved_c, unsigned int operands) { + + unsigned int i; + + if (!name) { + + if (operands != 2) { + return EXPR_TYPE_INVALID; + } + + switch (**pp) { + + case ':': + + (*pp)++; + return EXPR_TYPE_FULL_PTR; + + case '[': + + (*pp)++; + return EXPR_TYPE_INDEX; + + } + + return EXPR_TYPE_INVALID; + + } + + for (i = 0; intel_types[i].name; i++) { + + if (xstrcasecmp (name, intel_types[i].name) == 0) { + break; + } + + } + + if (intel_types[i].name && *original_saved_c == ' ') { + + char *second_name, ch; + (*pp)++; + + second_name = *pp; + ch = get_symbol_name_end (pp); + + if (xstrcasecmp (second_name, "ptr") == 0) { + + second_name[-1] = *original_saved_c; + *original_saved_c = ch; + + return intel_types[i].expr_type; + + } + + **pp = ch; + *pp = second_name - 1; + + return EXPR_TYPE_ABSENT; + + } + + for (i = 0; intel_operators[i].name; i++) { + + if (xstrcasecmp (name, intel_operators[i].name) == 0) { + + if (operands != intel_operators[i].operands) { + return EXPR_TYPE_INVALID; + } + + return intel_operators[i].expr_type; + + } + + } + + return EXPR_TYPE_ABSENT; + +} + +extern struct section *current_section; + +struct section *machine_dependent_simplified_expression_read_into (char *start, char **pp, struct expr *expr) { + + struct section *ret_section; + int ret; + + memset (&intel_state, 0, sizeof (intel_state)); + intel_state.operand_modifier = EXPR_TYPE_ABSENT; + + instruction.operands = -1; + intel_syntax = -1; + + ret_section = expression_read_into (start, pp, expr); + ret = intel_simplify_expr (expr); + + intel_syntax = 1; + + if (!ret) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "bad machine-dependent expression"); + expr->type = EXPR_TYPE_INVALID; + + } + + return ret_section; + +} + +int machine_dependent_force_relocation_local (struct fixup *fixup) { + return fixup->pcrel == 0; +} + +int machine_dependent_need_index_operator (void) { + return intel_syntax < 0; +} + +int machine_dependent_parse_name (char **pp, struct expr *expr, char *name, char *original_saved_c) { + + struct reg_entry *reg; + char *orig_end; + + orig_end = *pp; + **pp = *original_saved_c; + + reg = parse_register (name, pp); + + if (reg && orig_end <= *pp) { + + *original_saved_c = **pp; + **pp = '\0'; + + if (reg != &bad_register) { + + expr->type = EXPR_TYPE_REGISTER; + expr->add_number = reg - reg_table; + + } else { + expr->type = EXPR_TYPE_INVALID; + } + + return 1; + + } + + *pp = orig_end; + **pp = '\0'; + + return (intel_syntax ? intel_parse_name (expr, name) : 0); + +} + +signed long machine_dependent_estimate_size_before_relax (struct frag *frag, struct section *section) { + + if (symbol_get_section (frag->symbol) != section) { + + int size = (frag->relax_subtype & RELAX_SUBTYPE_CODE16_JUMP) ? 2 : 4; + + unsigned char *opcode_pos = frag->buf + frag->opcode_offset_in_buf; + unsigned long old_frag_fixed_size = frag->fixed_size; + + switch (TYPE_FROM_RELAX_SUBTYPE (frag->relax_subtype)) { + + case RELAX_SUBTYPE_UNCONDITIONAL_JUMP: + + *opcode_pos = 0xE9; + + fixup_new (frag, frag->fixed_size, size, frag->symbol, frag->offset, 1, RELOC_TYPE_DEFAULT); + frag->fixed_size += size; + + break; + + case RELAX_SUBTYPE_CONDITIONAL_JUMP86: + + if (size == 2) { + + /* Negates the condition and jumps past unconditional jump. */ + opcode_pos[0] ^= 1; + opcode_pos[1] = 3; + + /* Inserts the unconditional jump. */ + opcode_pos[2] = 0xE9; + + frag->fixed_size += 4; + fixup_new (frag, old_frag_fixed_size + 2, size, frag->symbol, frag->offset, 1, RELOC_TYPE_DEFAULT); + + break; + + } + + /* fall through. */ + + case RELAX_SUBTYPE_CONDITIONAL_JUMP: + + opcode_pos[1] = opcode_pos[0] + 0x10; + opcode_pos[0] = TWOBYTE_OPCODE; + + fixup_new (frag, frag->fixed_size + 1, size, frag->symbol, frag->offset, 1, RELOC_TYPE_DEFAULT); + frag->fixed_size += size + 1; + + break; + + case RELAX_SUBTYPE_FORCED_SHORT_JUMP: + + size = 1; + + fixup_new (frag, frag->fixed_size, size, frag->symbol, frag->offset, 1, RELOC_TYPE_DEFAULT); + frag->fixed_size += size; + + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, + "%s: %lu: machine_dependent_estimate_size_before_relax invalid case", frag->filename, frag->line_number); + exit (EXIT_FAILURE); + + } + + frag->relax_type = RELAX_TYPE_NONE_NEEDED; + return frag->fixed_size - old_frag_fixed_size; + + } + + return relax_table[frag->relax_subtype].size_of_variable_part; + +} + +signed long machine_dependent_pcrel_from (struct fixup *fixup) { + return (fixup->size + fixup->where + fixup->frag->address); +} + +signed long machine_dependent_relax_frag (struct frag *frag, struct section *section, signed long change) { + + unsigned long target; + + unsigned int new_subtype; + signed long aim, growth; + + target = frag->offset; + + if (frag->symbol) { + + target += symbol_get_value (frag->symbol); + + if ((section == symbol_get_section (frag->symbol)) && (frag->relax_marker != frag->symbol->frag->relax_marker)) { + target += change; + } + + } + + aim = target - frag->address - frag->fixed_size; + + if (aim > 0) { + + for (new_subtype = frag->relax_subtype; relax_table[new_subtype].next_subtype; new_subtype = relax_table[new_subtype].next_subtype) { + + if (aim <= relax_table[new_subtype].forward_reach) { + break; + } + + } + + } else if (aim < 0) { + + for (new_subtype = frag->relax_subtype; relax_table[new_subtype].next_subtype; new_subtype = relax_table[new_subtype].next_subtype) { + + if (aim >= relax_table[new_subtype].backward_reach) { + break; + } + + } + + } else { + return 0; + } + + growth = relax_table[new_subtype].size_of_variable_part; + growth -= relax_table[frag->relax_subtype].size_of_variable_part; + + if (growth) { frag->relax_subtype = new_subtype; } + return growth; + +} + +void machine_dependent_apply_fixup (struct fixup *fixup, unsigned long value) { + + unsigned char *p = fixup->where + fixup->frag->buf; + + if (!fixup->add_symbol) { + fixup->done = 1; + } + + if (fixup->reloc_type == RELOC_TYPE_FAR_CALL) { + + if (fixup->add_symbol == 0) { + + if ((long) value >= 65535) { + + value--; + + machine_dependent_number_to_chars (p, value % 16, 2); + machine_dependent_number_to_chars (p + 2, value / 16, 2); + + } else { + + value -= (fixup->where + fixup->frag->address); + value -= fixup->size; + + machine_dependent_number_to_chars (p - 1, 0x0E, 1); + machine_dependent_number_to_chars (p + 1, value + 1, 2); + + machine_dependent_number_to_chars (p, 0xE8, 1); + machine_dependent_number_to_chars (p + 3, 0x90, 1); + + } + + } else { + machine_dependent_number_to_chars (p, 0, fixup->size); + } + + } else { + machine_dependent_number_to_chars (p, value, fixup->size); + } + +} + +void machine_dependent_finish_frag (struct frag *frag) { + + unsigned char *opcode_pos; + + unsigned char *displacement_pos; + long displacement; + + int size; + unsigned long extension = 0; + + opcode_pos = frag->buf + frag->opcode_offset_in_buf; + + displacement_pos = opcode_pos + 1; + displacement = (symbol_get_value (frag->symbol) + frag->offset - frag->address - frag->fixed_size); + + if ((frag->relax_subtype & RELAX_SUBTYPE_LONG_JUMP) == 0) { + + displacement_pos = opcode_pos + 1; + extension = relax_table[frag->relax_subtype].size_of_variable_part; + + if (RELAX_SUBTYPE_FORCED_SHORT_JUMP) { + + if (displacement > relax_table[frag->relax_subtype].forward_reach || displacement < relax_table[frag->relax_subtype].backward_reach) { + report_at (frag->filename, frag->line_number, REPORT_ERROR, "forced short jump out of range"); + } + + } + + } else { + + switch (frag->relax_subtype) { + + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_LONG_JUMP): + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_LONG16_JUMP): + + extension = relax_table[frag->relax_subtype].size_of_variable_part; + opcode_pos[0] = 0xE9; + + displacement_pos = opcode_pos + 1; + break; + + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_LONG_JUMP): + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_LONG_JUMP): + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_LONG16_JUMP): + + extension = relax_table[frag->relax_subtype].size_of_variable_part; + + opcode_pos[1] = opcode_pos[0] + 0x10; + opcode_pos[0] = TWOBYTE_OPCODE; + + displacement_pos = opcode_pos + 2; + break; + + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_LONG16_JUMP): + + extension = relax_table[frag->relax_subtype].size_of_variable_part; + + /* Negates the condition and jumps past unconditional jump. */ + opcode_pos[0] ^= 1; + opcode_pos[1] = 3; + + /* Inserts the unconditional jump. */ + opcode_pos[2] = 0xE9; + + displacement_pos = opcode_pos + 3; + break; + + } + + } + + size = DISPLACEMENT_SIZE_FROM_RELAX_SUBSTATE (frag->relax_subtype); + displacement -= extension; + + machine_dependent_number_to_chars (displacement_pos, displacement, size); + frag->fixed_size += extension; + +} + + +void machine_dependent_assemble_line (char *start, char *line) { + + memset (&instruction, 0, sizeof (instruction)); + memset (operand_exprs, 0, sizeof (operand_exprs)); + + operand_exprs_count = 0; + line = parse_instruction (line); + + if (!line || parse_operands (start, &line)) { + return; + } + + /** + * All Intel instructions have reversed operands except "bound" and some other. + * "ljmp" and "lcall" with 2 immediate operands also do not have operands reversed. + */ + if (intel_syntax && instruction.operands > 1 && strcmp (current_templates->name, "bound") && !((instruction.types[0] & IMM) && (instruction.types[1] & IMM))) { + swap_operands (); + } + + optimize_size_of_disps (); + optimize_size_of_imms (); + + if (match_template () || process_suffix () || finalize_imms ()) { + return; + } + + if (instruction.template.operand_types[0] & IMPLICIT_REGISTER) { + instruction.reg_operands--; + } + + if (instruction.template.operand_types[1] & IMPLICIT_REGISTER) { + instruction.reg_operands--; + } + + if (instruction.operands) { + + if (process_operands ()) { + return; + } + + } + + /* int $3 should be converted to the one byte INT3. */ + if (instruction.template.base_opcode == INT_OPCODE && instruction.imms[0]->add_number == 3) { + + instruction.template.base_opcode = INT3_OPCODE; + instruction.operands = 0; + + } + + if (instruction.template.opcode_modifier & JUMP) { + output_jump (); + } else if (instruction.template.opcode_modifier & (CALL | JUMPBYTE)) { + output_call_or_jumpbyte (); + } else if (instruction.template.opcode_modifier & JUMPINTERSEGMENT) { + output_intersegment_jump (); + } else { + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE (instruction.prefixes); i++) { + + if (instruction.prefixes[i]) { + frag_append_1_char (instruction.prefixes[i]); + } + + } + + if (instruction.template.base_opcode & 0xff00) { + frag_append_1_char ((instruction.template.base_opcode >> 8) & 0xff); + } + + frag_append_1_char (instruction.template.base_opcode & 0xff); + + if (instruction.template.opcode_modifier & MODRM) { + + frag_append_1_char (((instruction.modrm.regmem << 0) | (instruction.modrm.reg << 3) | (instruction.modrm.mode << 6))); + + if ((instruction.modrm.regmem == MODRM_REGMEM_TWO_BYTE_ADDRESSING) && (instruction.modrm.mode != 3) && !(instruction.base_reg && (instruction.base_reg->type & REG16))) { + frag_append_1_char (((instruction.sib.base << 0) | (instruction.sib.index << 3) | (instruction.sib.scale << 6))); + } + + } + + output_disps (); + output_imms (); + + } + +} + +void machine_dependent_number_to_chars (unsigned char *p, unsigned int number, unsigned int size) { + + unsigned int i; + + for (i = 0; i < size; i++) { + p[i] = (number >> (8 * i)) & 0xff; + } + +} + +void machine_dependent_parse_operand (char *start, char **pp, struct expr *expr) { + + char *end; + + switch (**pp) { + + case '[': + + end = (*pp)++; + expression_read_into (start, pp, expr); + + if (**pp == ']' && expr->type != EXPR_TYPE_INVALID) { + + (*pp)++; + + if (expr->type == EXPR_TYPE_SYMBOL && expr->add_number == 0) { + + expr->add_number = 1; + + expr->op_symbol = make_expr_symbol (expr); + symbol_get_value_expression (expr->op_symbol)->add_number = 0; + + } else { + expr->op_symbol = make_expr_symbol (expr); + } + + expr->type = EXPR_TYPE_INDEX; + expr->add_number = 0; + expr->add_symbol = 0; + + } else { + + expr->type = EXPR_TYPE_ABSENT; + *pp = end; + + } + + break; + + } + +} diff --git a/intel.h b/intel.h new file mode 100644 index 0000000..98711c8 --- /dev/null +++ b/intel.h @@ -0,0 +1,125 @@ +/****************************************************************************** + * @file intel.h + *****************************************************************************/ +#ifndef _INTEL_H +#define _INTEL_H + +#define MAX_OPERANDS 3 +#define MAX_REG_NAME_SIZE 8 + +struct template { + + const char *name; + int operands; + +#define NO_BSUF (1LU << 6) /* 0x00000040 */ +#define NO_WSUF (1LU << 7) /* 0x00000080 */ +#define NO_SSUF (1LU << 8) /* 0x00000100 */ +#define NO_LSUF (1LU << 21) /* 0x00200000 */ +#define NO_QSUF (1LU << 22) /* 0x00400000 */ +#define NO_INTELSUF (1LU << 23) /* 0x00800000 */ + + unsigned int base_opcode; + unsigned int extension_opcode; + +#define NONE (~0U) + + unsigned long opcode_modifier; + +#define W (1LU << 0) /* 0x00000001 */ +#define D (1LU << 1) /* 0x00000002 */ + +#define MODRM (1LU << 2) /* 0x00000004 */ +#define SHORT_FORM (1LU << 3) /* 0x00000008 */ + +#define JUMP (1LU << 4) /* 0x00000010 */ +#define CALL (1LU << 5) /* 0x00000020 */ + +#define IGNORE_SIZE (1LU << 9) /* 0x00000200 */ +#define DEFAULT_SIZE (1LU << 24) /* 0x01000000 */ +#define SEGSHORTFORM (1LU << 18) /* 0x00040000 */ + +#define JUMPINTERSEGMENT (1LU << 11) /* 0x00000800 */ +#define JUMPBYTE (1LU << 12) /* 0x00001000 */ + +#define SIZE16 (1LU << 13) /* 0x00002000 */ +#define SIZE32 (1LU << 14) /* 0x00004000 */ + +#define IS_PREFIX (1LU << 15) /* 0x00008000 */ +#define IS_STRING (1LU << 16) /* 0x00010000 */ + +#define REG_DUPLICATION (1LU << 17) /* 0x00020000 */ + + unsigned long operand_types[MAX_OPERANDS]; + +#define REG8 0x00000001 +#define REG16 0x00000002 +#define REG32 0x00000004 + +#define REG (REG8 | REG16 | REG32) +#define WORD_REG (REG16 | REG32) + +#define SEGMENT1 0x00000008 +#define SEGMENT2 0x00020000 +#define CONTROL 0x00000010 +#define DEBUG 0x00100000 +#define TEST 0x00200000 + +#define IMM8 0x00000020 +#define IMM8S 0x00000040 +#define IMM16 0x00000080 +#define IMM32 0x00000100 + +#define IMM (IMM8 | IMM8S | IMM16 | IMM32) +#define ENCODABLEIMM (IMM8 | IMM16 | IMM32) + +#define DISP8 0x00000200 +#define DISP16 0x00000400 +#define DISP32 0x00000800 + +#define DISP (DISP8 | DISP16 | DISP32) +#define BASE_INDEX 0x00001000 + +/** + * INV_MEM is for instruction with modrm where general register + * encoding is allowed only in modrm.regmem (control register move). + */ +#define INV_MEM 0x00040000 +#define ANY_MEM (DISP8 | DISP16 | DISP32 | BASE_INDEX | INV_MEM) + +#define ACC 0x00002000 +#define PORT 0x00004000 +#define SHIFT_COUNT 0x00008000 +#define JUMP_ABSOLUTE 0x00010000 + +#define IMPLICIT_REGISTER (SHIFT_COUNT | ACC) + + unsigned int cpu_flags; + +#define CPU_8086 (1U << 0) +#define CPU_186 (1U << 1) +#define CPU_286 (1U << 2) +#define CPU_386 (1U << 3) +#define CPU_486 (1U << 4) +#define CPU_586 (1U << 5) +#define CPU_686 (1U << 6) + +#define CPU_8087 (1U << 7) +#define CPU_287 (1U << 8) +#define CPU_387 (1U << 9) +#define CPU_687 (1U << 10) + +#define CPU_CMOV (1U << 12) + +}; + +#define REG_FLAT_NUMBER (~0U) + +struct reg_entry { + + const char *name; + unsigned int type, number; + +}; + +#endif /* _INTEL_H */ diff --git a/kwd.c b/kwd.c new file mode 100644 index 0000000..73f1209 --- /dev/null +++ b/kwd.c @@ -0,0 +1,634 @@ +/****************************************************************************** + * @file kwd.c + *****************************************************************************/ +#include +#include +#include + +#include "as.h" +#include "expr.h" +#include "fixup.h" +#include "frag.h" +#include "hashtab.h" +#include "kwd.h" +#include "lex.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +extern struct section *machine_dependent_simplified_expression_read_into (char *start, char **pp, struct expr *expr); + +static struct hashtab hashtab_pseudo_ops = { 0 }; +static struct hashtab hashtab_data_pseudo_ops = { 0 }; + +static int read_character (const char *start, char **pp, unsigned long *val) { + + int ch, i; + + switch (ch = *((*pp)++)) { + + case '"': + + return 1; + + case '\0': + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "null character in string; '\"' inserted"); + + (*pp)--; /* Might be the end of line buffer. */ + return 1; + + case '\n': + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "unterminated string; newline inserted"); + + set_line_number (get_line_number () + 1); + *val = ch; + + break; + + case '\\': + + switch (ch = *((*pp)++)) { + + case '0': case '1': + case '2': case '3': + case '4': case '5': + case '6': case '7': + + for (i = 0, *val = 0; isdigit (ch) && (i < 3); (ch = *((*pp)++)), i++) { + *val = *val * 8 + (ch - '0'); + } + + (*pp)--; + break; + + case 'x': case 'X': + + ch = *((*pp)++); + + for (i = 0, *val = 0; isxdigit (ch) && (i < 3); (ch = *((*pp)++)), i++) { + + if (isdigit (ch)) { + *val = *val * 16 + (ch - '0'); + } else if (isupper (ch)) { + *val = *val * 16 + ((ch = 'A') + 10); + } else { + *val = *val * 16 + ((ch = 'a') + 10); + } + + } + + (*pp)--; + break; + + case 'r': + + *val = 13; + break; + + case 'n': + + *val = 10; + break; + + case '\\': + case '"': + + *val = ch; + break; + + default: + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp - 1, "unknown escape sequence: '\\%c'", ch); + + *val = ch; + break; + + } + + break; + + default: + + *val = ch; + break; + + } + + return 0; + +} + +static void align_bytes (char *start, char **pp, int first_arg_is_bytes) { + + signed long alignment; + int fill_specified; + + signed long fill_value = 0, max_bytes_to_skip; + signed long i; + + alignment = get_result_of_absolute_expression (start, pp); + + if (first_arg_is_bytes) { + + /* Converts to log2. */ + for (i = 0; (alignment & 1) == 0; alignment >>= 1, i++); + + if (alignment != 1) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "alignment is not a power of 2"); + } + + alignment = i; + + } + + if (**pp != ',') { + + fill_specified = 0; + max_bytes_to_skip = 0; + + } else { + + *pp = skip_whitespace (*pp + 1); + + if (**pp == ',') { + + fill_specified = 0; + *pp = skip_whitespace (*pp + 1); + + } else { + + fill_value = get_result_of_absolute_expression (start, pp); + fill_specified = 1; + + } + + + if (**pp != ',') { + max_bytes_to_skip = 0; + } else { + + *pp = skip_whitespace (*pp + 1); + max_bytes_to_skip = get_result_of_absolute_expression (start, pp); + + } + + } + + if (fill_specified) { + frag_align (alignment, fill_value, max_bytes_to_skip); + } else { + + if (current_section == text_section) { + frag_align_code (alignment, max_bytes_to_skip); + } else { + frag_align (alignment, 0, max_bytes_to_skip); + } + + } + +} + +static void handle_constant (char *start, char **pp, int size) { + + struct expr expr, val; + + char *temp, *arg; + signed long repeat; + + while (1) { + + *pp = skip_whitespace (*pp); + + if (**pp == '"') { + + unsigned long val; + int i; + + (*pp)++; + + while (!read_character (start, pp, &val)) { + + for (i = 0; i < size; i++) { + frag_append_1_char ((val >> (8 * i)) & 0xff); + } + + } + + } else { + + machine_dependent_simplified_expression_read_into (start, pp, &expr); + + if (!is_end_of_line[(int) **pp]) { + + temp = (*pp = skip_whitespace (*pp)); + + if (is_name_beginner ((int) **pp)) { + + if (!(arg = symname (pp))) { + goto no_repeat; + } + + if (xstrcasecmp (arg, "dup")) { + + free (arg); + goto no_repeat; + + } + + *pp = skip_whitespace (*pp); + machine_dependent_simplified_expression_read_into (start, pp, &val); + + if (val.type != EXPR_TYPE_CONSTANT) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid value for dup"); + + ignore_rest_of_line (pp); + return; + + } + + if (val.add_number != 0 && current_section == bss_section) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "attempt to initialize memory in a nobits section; ignored"); + val.add_number = 0; + + } + + if (val.add_number > 0xff) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "dup value %lu truncated to %lu", val.add_number, val.add_number & 0xff); + val.add_number &= 0xff; + + } + + if (expr.type == EXPR_TYPE_CONSTANT) { + + repeat = expr.add_number; + + if (repeat == 0) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "dup repeat count is zero; ignored"); + goto next; + + } + + if (repeat < 0) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "dup repeate count is negative; ignored"); + goto next; + + } + + memset (frag_increase_fixed_size (repeat), val.add_number, repeat); + + } else { + + struct symbol *expr_symbol = make_expr_symbol (&expr); + + unsigned char *p = frag_alloc_space (symbol_get_value (expr_symbol)); + *p = val.add_number; + + frag_set_as_variant (RELAX_TYPE_SPACE, 0, expr_symbol, 0, 0); + + } + + goto next; + + } + + no_repeat: + + *pp = temp; + + } + + if (expr.type == EXPR_TYPE_CONSTANT) { + + int i; + + for (i = 0; i < size; i++) { + frag_append_1_char ((expr.add_number >> (8 * i)) & 0xff); + } + + } else if (expr.type != EXPR_TYPE_INVALID) { + + fixup_new_expr (current_frag, current_frag->fixed_size, size, &expr, 0, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } else { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "value is not a constant"); + return; + + } + + } + + next: + + *pp = skip_whitespace (*pp); + + if (**pp != ',') { + break; + } + + (*pp)++; + + } + +} + + +static void handler_align (char *start, char **pp) { + align_bytes (start, pp, 1); +} + +static void handler_bss (char *start, char **pp) { + + (void) start; + (void) pp; + + section_set (bss_section); + +} + +static void handler_byte (char *start, char **pp) { + handle_constant (start, pp, 1); +} + +static void handler_data (char *start, char **pp) { + + (void) start; + (void) pp; + + section_set (data_section); + +} + +static void handler_end (char *start, char **pp) { + + char *name, *caret = skip_whitespace (*pp); + + if ((name = symname (pp))) { + + if (!(state->end_symbol = symbol_find (name))) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "undefined symbol '%s'", name); + } else if (symbol_is_undefined (state->end_symbol) || state->end_symbol->scope == SYMBOL_SCOPE_EXTERN) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "undefined symbol '%s'", name); + } + + free (name); + + } + +} + +static void handler_global (char *start, char **pp) { + + struct symbol *symbol; + char *name, *caret; + + for (;;) { + + caret = (*pp = skip_whitespace (*pp)); + + if (!(name = symname (pp))) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "expected symbol name"); + + ignore_rest_of_line (pp); + return; + + } + + if ((symbol = symbol_find (name))) { + + if (symbol->scope == SYMBOL_SCOPE_EXTERN) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "symbol '%s' is already defined", name); + } else { + + symbol->scope = SYMBOL_SCOPE_GLOBAL; + symbol_set_external (symbol); + + } + + } else { + + symbol = symbol_make (name); + symbol_add_to_chain (symbol); + + symbol->scope = SYMBOL_SCOPE_GLOBAL; + symbol_set_external (symbol); + + } + + *pp = skip_whitespace (*pp); + + if (**pp != ',') { + break; + } + + (*pp)++; + + } + +} + +static void handler_long (char *start, char **pp) { + handle_constant (start, pp, 4); +} + +static void handler_stack (char *start, char **pp) { + + struct section *curr_sect; + struct expr expr; + + machine_dependent_simplified_expression_read_into (start, pp, &expr); + + if (expr.type == EXPR_TYPE_CONSTANT) { + + curr_sect = current_section; + section_set (bss_section); + + memset (frag_increase_fixed_size (expr.add_number), 0, expr.add_number); + section_set (curr_sect); + + } else { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "value is not a constant"); + return; + + } + +} + +static void handler_text (char *start, char **pp) { + + (void) start; + (void) pp; + + section_set (text_section); + +} + +static void handler_word (char *start, char **pp) { + handle_constant (start, pp, 2); +} + +static struct pseudo_op_entry pseudo_op_table[] = { + + { ".code", &handler_text }, + { ".bss", &handler_bss }, + { ".data", &handler_data }, + { ".data?", &handler_bss }, + { ".stack", &handler_stack }, + { ".text", &handler_text }, + + { "align", &handler_align }, + { "end", &handler_end }, + { "global", &handler_global }, + { "public", &handler_global }, + + { 0, 0 } + +}; + +static struct pseudo_op_entry data_pseudo_op_table[] = { + + { "db", &handler_byte }, + { "dd", &handler_long }, + { "dw", &handler_word }, + + { 0, 0 } + +}; + +void install_pseudo_op_table (struct pseudo_op_entry *table) { + + struct pseudo_op_entry *entry; + struct hashtab_name *key; + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_pseudo_ops, key, entry); + + } + +} + +void install_data_pseudo_op_table (struct pseudo_op_entry *table) { + + struct pseudo_op_entry *entry; + struct hashtab_name *key; + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_pseudo_ops, key, entry); + + } + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_data_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_data_pseudo_ops, key, entry); + + } + +} + +struct pseudo_op_entry *find_poe (char *name) { + + struct hashtab_name *key; + struct pseudo_op_entry *entry; + + char *lname = to_lower (name); + + if ((key = hashtab_get_key (&hashtab_pseudo_ops, lname))) { + + if ((entry = hashtab_get (&hashtab_pseudo_ops, key))) { + + free (lname); + return entry; + + } + + } + + free (lname); + return 0; + +} + +struct pseudo_op_entry *find_data_poe (char *name) { + + struct hashtab_name *key; + struct pseudo_op_entry *entry; + + char *lname = to_lower (name); + + if ((key = hashtab_get_key (&hashtab_data_pseudo_ops, lname))) { + + if ((entry = hashtab_get (&hashtab_data_pseudo_ops, key))) { + + free (lname); + return entry; + + } + + } + + free (lname); + return 0; + +} + +void keywords_init (void) { + + install_pseudo_op_table (pseudo_op_table); + install_data_pseudo_op_table (data_pseudo_op_table); + +} diff --git a/kwd.h b/kwd.h new file mode 100644 index 0000000..70821ab --- /dev/null +++ b/kwd.h @@ -0,0 +1,19 @@ +/****************************************************************************** + * @file kwd.h + *****************************************************************************/ +#ifndef _KWD_H +#define _KWD_H + +struct pseudo_op_entry { + + const char *name; + void (*handler) (char *start, char **pp); + +}; + +struct pseudo_op_entry *find_poe (char *name); +struct pseudo_op_entry *find_data_poe (char *name); + +void install_pseudo_op_table (struct pseudo_op_entry *table); + +#endif /* _KWD_H */ diff --git a/lex.c b/lex.c new file mode 100644 index 0000000..7fa85a4 --- /dev/null +++ b/lex.c @@ -0,0 +1,35 @@ +/****************************************************************************** + * @file lex.c + *****************************************************************************/ +#include "lex.h" + +char is_end_of_line[256] = { 0 }; +char lex_table[256] = { 0 }; + +void lex_init (void) { + + int i; + + is_end_of_line[0] = 1; + is_end_of_line[10] = 1; + + lex_table[36] = LEX_NAME_START | LEX_NAME_PART; + lex_table[46] = LEX_NAME_START | LEX_NAME_PART; + + for (i = 48; i < 58; i++) { + lex_table[i] = LEX_NAME_PART; + } + + lex_table[63] = LEX_NAME_PART; + + for (i = 65; i < 91; i++) { + lex_table[i] = LEX_NAME_START | LEX_NAME_PART; + } + + lex_table[95] = LEX_NAME_START | LEX_NAME_PART; + + for (i = 97; i < 123; i++) { + lex_table[i] = LEX_NAME_START | LEX_NAME_PART; + } + +} diff --git a/lex.h b/lex.h new file mode 100644 index 0000000..f295e8b --- /dev/null +++ b/lex.h @@ -0,0 +1,18 @@ +/****************************************************************************** + * @file lex.h + *****************************************************************************/ +#ifndef _LEX_H +#define _LEX_H + +#define LEX_NAME_PART 0x0001 +#define LEX_NAME_START 0x0002 + +extern char is_end_of_line[]; +extern char lex_table[]; + +#define is_name_beginner(c) (lex_table[(c)] & LEX_NAME_START) +#define is_name_part(c) (lex_table[(c)] & LEX_NAME_PART) + +void lex_init (void); + +#endif /* _LEX_H */ diff --git a/lib.c b/lib.c new file mode 100644 index 0000000..5635078 --- /dev/null +++ b/lib.c @@ -0,0 +1,542 @@ +/****************************************************************************** + * @file lib.c + *****************************************************************************/ +#include +#include +#include + +#include "as.h" +#include "lex.h" +#include "lib.h" +#include "report.h" + +#if defined (_WIN32) +# define PATHSEP ';' +#else +# define PATHSEP ':' +#endif + +struct as_option { + + const char *name; + int idx, flgs; + +}; + +#define AS_OPTION_NO_ARG 0 +#define AS_OPTION_HAS_ARG 1 + +#define AS_OPTION_NONE 0 +#define AS_OPTION_DEFINE 1 +#define AS_OPTION_FORMAT 2 +#define AS_OPTION_HELP 3 +#define AS_OPTION_INCLUDE 4 +#define AS_OPTION_LISTING 5 +#define AS_OPTION_OUTFILE 6 +#define AS_OPTION_UNDEF 7 + +static struct as_option opts[] = { + + { "-D", AS_OPTION_DEFINE, AS_OPTION_HAS_ARG }, + { "-I", AS_OPTION_INCLUDE, AS_OPTION_HAS_ARG }, + { "-U", AS_OPTION_UNDEF, AS_OPTION_HAS_ARG }, + + { "-f", AS_OPTION_FORMAT, AS_OPTION_HAS_ARG }, + { "-l", AS_OPTION_LISTING, AS_OPTION_HAS_ARG }, + { "-o", AS_OPTION_OUTFILE, AS_OPTION_HAS_ARG }, + + { "--help", AS_OPTION_HELP, AS_OPTION_NO_ARG }, + { 0, 0, 0 } + +}; + + +static int _strstart (const char *val, const char **str) { + + const char *p = *str; + const char *q = val; + + while (*q != '\0') { + + if (*p != *q) { + return 0; + } + + ++p; + ++q; + + } + + *str = p; + return 1; + +} + +static void _print_usage (void) { + + if (program_name) { + + fprintf (stderr, "Usage: %s [options] file\n\n", program_name); + fprintf (stderr, "Options:\n\n"); + + fprintf (stderr, " -Dname[=value] Define 'name' with value 'value'.\n"); + fprintf (stderr, " -I DIR Add DIR to include search path.\n"); + fprintf (stderr, " -Uname Undefine 'name'.\n"); + + fprintf (stderr, "\n"); + fprintf (stderr, " -l FILE Print listings to file FILE.\n"); + fprintf (stderr, " -o OBJFILE Name the object-file output OBJFILE (default a.out).\n"); + + fprintf (stderr, "\n"); + fprintf (stderr, " --help Print this help information.\n"); + + fprintf (stderr, "\n"); + + } + +} + + +char get_symbol_name_end (char **pp) { + + char c = **pp; + + if (is_name_beginner ((int) (*pp)[0])) { + + while (is_name_part ((int) (*pp)[0])) { + (*pp)++; + } + + c = **pp; + + } + + **pp = '\0'; + return c; + +} + +char *skip_whitespace (char *__p) { + + while (*__p == ' ' || *__p == '\t') { + __p++; + } + + return __p; + +} + +char *symname (char **pp) { + + char *p = *pp; + + if (is_name_beginner ((int) **pp)) { + + while (is_name_part ((int) **pp)) { + (*pp)++; + } + + return xstrndup (p, *pp - p); + + } + + return 0; + +} + +char *to_lower (const char *__p) { + + int i, len; + char *p; + + len = strlen (__p); + p = xmalloc (len + 1); + + for (i = 0; i < len; i++) { + p[i] = tolower ((int) __p[i]); + } + + return p; + +} + +char *xstrdup (const char *__p) { + + char *p = xmalloc (strlen (__p) + 1); + + strcpy (p, __p); + return p; + +} + +char *xstrndup (const char *__p, unsigned long __len) { + + char *p = xmalloc (__len + 1); + + memcpy (p, __p, __len); + return p; + +} + +int xstrcasecmp (const char *__s1, const char *__s2) { + + const unsigned char *p1 = (const unsigned char *) __s1; + const unsigned char *p2 = (const unsigned char *) __s2; + + while (*p1 != '\0') { + + if (tolower ((int) *p1) < tolower ((int) *p2)) { + return (-1); + } else if (tolower ((int) *p1) > tolower ((int) *p2)) { + return (1); + } + + p1++; + p2++; + + } + + if (*p2 == '\0') { + return (0); + } + + return (-1); + +} + +int xstrncasecmp (const char *__s1, const char *__s2, unsigned long __len) { + + const unsigned char *p1 = (const unsigned char *) __s1; + const unsigned char *p2 = (const unsigned char *) __s2; + + while (*p1 != '\0' && __len-- > 0) { + + if (tolower ((int) *p1) < tolower ((int) *p2)) { + return (-1); + } else if (tolower ((int) *p1) > tolower ((int) *p2)) { + return (1); + } + + p1++; + p2++; + + } + + if (*p2 == '\0') { + return (0); + } + + return (-1); + +} + +void add_include_path (const char *__p) { + + char *in = xstrdup (__p); + char *temp = in, *p; + + do { + + for (p = temp; *p != '\0' && *p != PATHSEP; p++) { + + if (*p == '\\') { + *p = '/'; + } + + } + + if ((p - temp) > 0) { + + int len = (p - temp); + char *path; + + if (*(p - 1) != '/') { + + path = xmalloc (2 + (p - temp) + 2); + sprintf (path, "-I%.*s/", len, temp); + + } else { + + path = xmalloc (2 + (p - temp) + 1); + sprintf (path, "-I%.*s", len, temp); + + } + + list_append (&state->pplist, path); + + } + + temp = (p + 1); + + } while (*p != '\0'); + + free (in); + +} + +void ignore_rest_of_line (char **pp) { + + while (!is_end_of_line[(int) **pp]) { + + if ((*pp)++[0] == '\"') { + + while (**pp && **pp != '\"') { + + if ((*pp)++[0] == '\\' && **pp) { + (*pp)++; + } + + } + + } + + } + +} + +void parse_args (int argc, char **argv, int optind) { + + struct as_option *popt; + const char *optarg, *r; + + if (argc <= optind) { + + _print_usage (); + exit (EXIT_SUCCESS); + + } + + while (optind < argc) { + + r = argv[optind++]; + + if (r[0] != '-' || r[1] == '\0') { + + if (state->ifile) { + + report_at (program_name, 0, REPORT_ERROR, "more than one file passed as input"); + exit (EXIT_FAILURE); + + } + + state->ifile = xstrdup (r); + continue; + + } + + for (popt = opts; ; popt++) { + + const char *p1 = popt->name; + const char *r1 = r; + + if (!p1) { + + report_at (program_name, 0, REPORT_ERROR, "invalid option -- '%s'", r); + exit (EXIT_FAILURE); + + } + + if (!_strstart (p1, &r1)) { + continue; + } + + optarg = r1; + + if (popt->flgs & AS_OPTION_HAS_ARG) { + + if (*r1 == '\0') { + + if (optind >= argc) { + + report_at (program_name, 0, REPORT_ERROR, "argument to '%s' is missing", r); + exit (EXIT_FAILURE); + + } + + optarg = argv[optind++]; + + } + + } else if (*r1 != '\0') { + continue; + } + + break; + + } + + switch (popt->idx) { + + case AS_OPTION_DEFINE : { + + char *arg; + + if (!strchr (optarg, '=')) { + + arg = xmalloc (2 + strlen (optarg) + 3); + sprintf (arg, "-D%s=1", optarg); + + } else { + + arg = xmalloc (2 + strlen (optarg) + 1); + sprintf (arg, "-D%s", optarg); + + } + + list_append (&state->pplist, arg); + break; + + } + + case AS_OPTION_FORMAT: { + + if (xstrcasecmp (optarg, "bin") == 0) { + + state->format = AS_OUTPUT_BIN; + break; + + } + + if (xstrcasecmp (optarg, "obj") == 0) { + + state->format = AS_OUTPUT_OBJ; + break; + + } + + report_at (program_name, 0, REPORT_ERROR, "unrecognised output format '%s'", optarg); + exit (EXIT_FAILURE); + + } + + case AS_OPTION_HELP: { + + _print_usage (); + exit (EXIT_SUCCESS); + + } + + case AS_OPTION_INCLUDE: { + + add_include_path (optarg); + break; + + } + + case AS_OPTION_LISTING: { + + if (state->lfile) { + + report_at (program_name, 0, REPORT_ERROR, "multiple listing files provided"); + exit (EXIT_FAILURE); + + } + + state->lfile = xstrdup (optarg); + break; + + } + + case AS_OPTION_OUTFILE: { + + if (state->ofile) { + + report_at (program_name, 0, REPORT_ERROR, "multiple output files provided"); + exit (EXIT_FAILURE); + + } + + state->ofile = xstrdup (optarg); + break; + + } + + case AS_OPTION_UNDEF: { + + char *arg = xmalloc (2 + strlen (optarg) + 1); + sprintf (arg, "-U%s", optarg); + + list_append (&state->pplist, arg); + break; + + } + + default: { + + report_at (program_name, 0, REPORT_ERROR, "unsupported option '%s'", r); + exit (EXIT_FAILURE); + + } + + } + + } + + if (!state->ofile) { state->ofile = "a.out"; } + +} + +void *xmalloc (unsigned long __size) { + + void *ptr = malloc (__size); + + if (!ptr && __size) { + + report_at (program_name, 0, REPORT_ERROR, "memory full (malloc)"); + exit (EXIT_FAILURE); + + } + + memset (ptr, 0, __size); + return ptr; + +} + +void *xrealloc (void *__ptr, unsigned long __size) { + + void *ptr = realloc (__ptr, __size); + + if (!ptr && __size) { + + report_at (program_name, 0, REPORT_ERROR, "memory full (realloc)"); + exit (EXIT_FAILURE); + + } + + return ptr; + +} + + +static const char *filename = 0; +static unsigned long line_number = 0; + +const char *get_filename (void) { + return filename; +} + +unsigned long get_line_number (void) { + return line_number; +} + +void get_filename_and_line_number (const char **__filename_p, unsigned long *__line_number_p) { + + *__filename_p = filename; + *__line_number_p = line_number; + +} + +void set_filename_and_line_number (const char *__filename, unsigned long __line_number) { + + filename = __filename; + line_number = __line_number; + +} + +void set_filename (const char *__filename) { + filename = __filename; +} + +void set_line_number (unsigned long __line_number) { + line_number = __line_number; +} diff --git a/lib.h b/lib.h new file mode 100644 index 0000000..72e9461 --- /dev/null +++ b/lib.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * @file lib.h + *****************************************************************************/ +#ifndef _LIB_H +#define _LIB_H + +#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0])) + +char get_symbol_name_end (char **pp); + +char *skip_whitespace (char *__p); +char *symname (char **pp); + +char *xstrdup (const char *__p); +char *xstrndup (const char *__p, unsigned long __len); + +int xstrcasecmp (const char *__s1, const char *__s2); +int xstrncasecmp (const char *__s1, const char *__s2, unsigned long __len); + +char *to_lower (const char *__p); +void ignore_rest_of_line (char **pp); + + +void add_include_path (const char *__p); +void parse_args (int argc, char **argv, int optind); + +void *xmalloc (unsigned long __size); +void *xrealloc (void *__ptr, unsigned long __size); + + +const char *get_filename (void); +unsigned long get_line_number (void); + +void set_filename (const char *__filename); +void set_line_number (unsigned long __line_number); + +void get_filename_and_line_number (const char **__filename_p, unsigned long *__line_number_p); +void set_filename_and_line_number (const char *__filename, unsigned long __line_number); + +#endif /* _LIB_H */ diff --git a/list.c b/list.c new file mode 100644 index 0000000..a7e475b --- /dev/null +++ b/list.c @@ -0,0 +1,39 @@ +/****************************************************************************** + * @file list.c + *****************************************************************************/ +#include "lib.h" +#include "list.h" + +void list_append (struct list **list, void *data) { + + struct list *new = xmalloc (sizeof (*new)); + struct list *old = (*list); + + if (old) { + + new->next = old->next; + old->next = new; + + } else { + new->next = new; + } + + new->data = data; + *list = new; + +} + +unsigned int nlist (struct list *list) { + + unsigned int n = 0; + + if (list) { + + struct list *p = list; + do { n++; } while ((p = p->next) != list); + + } + + return n; + +} diff --git a/list.h b/list.h new file mode 100644 index 0000000..4825c64 --- /dev/null +++ b/list.h @@ -0,0 +1,16 @@ +/****************************************************************************** + * @file list.h + *****************************************************************************/ +#ifndef _LIST_H +#define _LIST_H + +struct list { + + struct list *next; + void *data; + +}; + +void list_append (struct list **list, void *data); + +#endif /* _LIST_H */ diff --git a/listing.c b/listing.c new file mode 100644 index 0000000..3102eb4 --- /dev/null +++ b/listing.c @@ -0,0 +1,304 @@ +/****************************************************************************** + * @file listing.c + *****************************************************************************/ +#include +#include + +#include "as.h" +#include "frag.h" +#include "lib.h" +#include "report.h" +#include "symbol.h" + +struct listing_message { + + char *message; + struct listing_message *next; + +}; + +struct ll { + + char *line; + + const char *filename; + unsigned long line_number; + + struct frag *frag; + + unsigned long where; + unsigned long size; + + int variant_frag; + struct ll *next; + + struct listing_message *messages, *last_message; + +}; + +static struct ll *first_line = NULL; +static struct ll *last_line = NULL; + +static void internal_add_line (char *line, const char *filename, unsigned long line_number) { + + struct ll *ll = xmalloc (sizeof (*ll)); + + ll->line = line; + ll->filename = filename; + ll->line_number = line_number; + ll->frag = current_frag; + + if (current_frag) { + ll->where = current_frag->fixed_size; + } + + if (first_line == NULL) { + + first_line = ll; + last_line = ll; + + } else { + + last_line->next = ll; + last_line = ll; + + } + + ll->messages = ll->last_message = NULL; + +} + +void add_listing_line (char *real_line, unsigned long real_line_len, const char *filename, unsigned long line_number) { + + unsigned long start, i; + char *line; + + for (start = 0, i = 0; i < real_line_len; i++) { + + if (real_line[i] == '\n') { + + line = xmalloc (i - start + 1); + + memcpy (line, real_line + start, i - start); + line[i - start] = '\0'; + + internal_add_line (line, filename, line_number); + line_number++; + + if (i == real_line_len - 1) { + return; + } + + start = i + 1; + + } + + } + + line = xmalloc (i - start + 1); + + memcpy (line, real_line + start, i - start); + line[i - start + 1] = '\0'; + + internal_add_line (line, filename, line_number); + +} + +void add_listing_message (char *message, const char *filename, unsigned long line_number) { + + struct ll *ll; + + for (ll = first_line; ll; ll = ll->next) { + + if (ll->line_number == line_number && strcmp (ll->filename, filename) == 0) { + + struct listing_message *lm = xmalloc (sizeof (*lm)); + + lm->message = message; + lm->next = NULL; + + if (ll->last_message) { + ll->last_message->next = lm; + } else { + ll->messages = lm; + } + + ll->last_message = lm; + return; + + } + + } + +} + +void generate_listing (void) { + + struct ll *ll; + struct symbol *symbol; + + FILE *f = stdout; + + if (state->lfile) { + + if ((f = fopen (state->lfile, "w")) == NULL) { + + report_at (program_name, 0, REPORT_ERROR, "Unable to open '%s' as listing file", state->lfile); + return; + + } + + } + + for (ll = first_line; ll; ll = ll->next) { + + struct listing_message *lm; + unsigned long i, size; + + if (ll->frag == NULL) { + size = 0; + } else if (ll->variant_frag) { + size = ll->frag->fixed_size - ll->where; + } else { + size = ll->size; + } + + fprintf (f, "%05lu ", ll->line_number); + + for (i = 0; i < size; i++) { + + if ((i > 0) && ((i % 16) == 0)) { + fprintf (f, "\n%05lu ", ll->line_number); + } + + fprintf (f, "%02X", ll->frag->buf[ll->where + i]); + + } + + if ((i > 0) && ((i % 16) == 0)) { + fprintf (f, "\n%05lu ", ll->line_number); + } + + i %= 16; + + for ( ; i < 20; i++) { + fprintf (f, " "); + } + + if (ll->frag) { + fprintf (f, "%04lX ", ll->frag->address + ll->where); + } else { + fprintf (f, " "); + } + + fprintf (f, " %s\n", ll->line); + + for (lm = ll->messages; lm; lm = lm->next) { + fprintf (f, "***** %s\n", lm->message); + } + + } + + if (symbols != NULL) { + + unsigned long local_symbols = 0; + unsigned long global_symbols = 0; + unsigned long undefined_symbols = 0; + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_section_symbol (symbol)) { + continue; + } + + if (symbol_is_undefined (symbol)) { + undefined_symbols++; + } else if (symbol_is_external (symbol)) { + global_symbols++; + } else { + local_symbols++; + } + + } + + if (local_symbols > 0) { + + fprintf (f, "\nLOCAL SYMBOLS:\n\n"); + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_section_symbol (symbol)) { + continue; + } + + if (symbol_is_undefined (symbol) || symbol_is_external (symbol)) { + continue; + } + + fprintf (f, " %08lx %s\n", symbol_get_value (symbol), symbol_get_name (symbol)); + + } + + } else { + fprintf (f, "\nNO LOCAL SYMBOLS\n"); + } + + if (global_symbols > 0) { + + fprintf (f, "\nGLOBAL SYMBOLS:\n\n"); + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_section_symbol (symbol)) { + continue; + } + + if (symbol_is_undefined (symbol) || !symbol_is_external (symbol)) { + continue; + } + + fprintf (f, " %08lx %s\n", symbol_get_value (symbol), symbol_get_name (symbol)); + + } + + } else { + fprintf (f, "\nNO GLOBAL SYMBOLS\n"); + } + + if (undefined_symbols > 0) { + + fprintf (f, "\nEXTERNAL SYMBOLS:\n\n"); + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_section_symbol (symbol)) { + continue; + } + + if (symbol_is_undefined (symbol)) { + fprintf (f, " %08lx %s\n", symbol_get_value (symbol), symbol_get_name (symbol)); + } + + } + + } else { + fprintf (f, "\nNO EXTERNAL SYMBOLS\n"); + } + + } + + if (state->lfile) { fclose (f); } + +} + +void update_listing_line (struct frag *frag) { + + if (last_line == NULL || last_line->frag == NULL) { return; } + + if (last_line->frag->next == frag) { + last_line->variant_frag = 1; + } else { + last_line->size = last_line->frag->fixed_size - last_line->where; + } + +} diff --git a/listing.h b/listing.h new file mode 100644 index 0000000..9e965db --- /dev/null +++ b/listing.h @@ -0,0 +1,15 @@ +/****************************************************************************** + * @file listing.h + *****************************************************************************/ +#ifndef _LISTING_H +#define _LISTING_H + +#include "frag.h" + +void add_listing_line (char *real_line, unsigned long real_line_len, const char *filename, unsigned long line_number); +void add_listing_message (char *message, const char *filename, unsigned long line_number); + +void generate_listing (void); +void update_listing_line (struct frag *frag); + +#endif /* _LISTING_H */ diff --git a/ll.c b/ll.c new file mode 100644 index 0000000..cb0833f --- /dev/null +++ b/ll.c @@ -0,0 +1,316 @@ +/****************************************************************************** + * @file ll.c + *****************************************************************************/ +#include +#include +#include +#include + +#include "ll.h" +#include "report.h" + +struct load_line_data { + + char *line, *real_line; + + unsigned long capacity, read_size; + unsigned long end_of_prev_real_line; + + unsigned long *new_line_number_p; + +}; + +#define CAPACITY_INCREMENT 256 +extern void get_filename_and_line_number (const char **__filename_p, unsigned long *__line_number_p); + +extern void *xmalloc (unsigned long __size); +extern void *xrealloc (void *__ptr, unsigned long __size); + +int load_line (char **line_p, char **line_end_p, char **real_line_p, unsigned long *real_line_len_p, unsigned long *newlines_p, FILE *ifp, void **load_line_internal_data_p) { + + struct load_line_data *ll_data = *load_line_internal_data_p; + unsigned long pos_in_line = 0, pos_in_real_line = 0, newlines = 0; + + int in_escape = 0, in_double_quote = 0, in_single_quote = 0; + int in_line_comment = 0, skipping_spaces = 0; + + if (ll_data->end_of_prev_real_line) { + + memmove (ll_data->real_line, ll_data->real_line + ll_data->end_of_prev_real_line, ll_data->read_size - ll_data->end_of_prev_real_line); + ll_data->read_size -= ll_data->end_of_prev_real_line; + + } + + while (1) { + + if (pos_in_line >= ll_data->capacity || pos_in_real_line >= ll_data->capacity) { + + ll_data->capacity += CAPACITY_INCREMENT; + + ll_data->line = xrealloc (ll_data->line, ll_data->capacity + 2); + ll_data->real_line = xrealloc (ll_data->real_line, ll_data->capacity + 1); + + } + + if (pos_in_real_line >= ll_data->read_size) { + + ll_data->read_size = fread (ll_data->real_line + pos_in_real_line, 1, ll_data->capacity - pos_in_real_line, ifp) + pos_in_real_line; + + if (ferror (ifp)) { + return 4; + } + + ll_data->real_line[ll_data->read_size] = '\0'; + + } + + copying: + + if (in_line_comment) { + + while (pos_in_real_line < ll_data->read_size) { + + if (ll_data->real_line[pos_in_real_line] == '\n') { + + in_line_comment = 0; + break; + + } + + pos_in_real_line++; + + } + + } + + if (skipping_spaces) { + + while (pos_in_real_line < ll_data->read_size) { + + if (ll_data->real_line[pos_in_real_line] != ' ' && ll_data->real_line[pos_in_real_line] != '\t') { + + skipping_spaces = 0; + break; + + } + + pos_in_real_line++; + + } + + } + + while (pos_in_real_line < ll_data->read_size && pos_in_line < ll_data->capacity) { + + ll_data->line[pos_in_line] = ll_data->real_line[pos_in_real_line++]; + + if (in_double_quote || in_single_quote) { + + if (in_escape) { + in_escape = 0; + } else if (in_double_quote && ll_data->line[pos_in_line] == '"') { + in_double_quote = 0; + } else if (in_single_quote && ll_data->line[pos_in_line] == '\'') { + in_single_quote = 0; + } else if (ll_data->line[pos_in_line] == '\\') { + in_escape = 1; + } + + if (ll_data->line[pos_in_line] == '\n') { + + int pos = pos_in_line; + + if (pos > 0 && ll_data->line[pos - 1] == '\r') { + ll_data->line[--pos] = '\n'; + } + + if (pos > 0) { + + if (ll_data->line[pos - 1] != '\\') { + + ll_data->line[pos + 1] = '\0'; + ll_data->end_of_prev_real_line = pos_in_real_line; + + *line_p = ll_data->line; + *line_end_p = ll_data->line + pos; + + *real_line_p = ll_data->real_line; + *real_line_len_p = pos_in_real_line; + + *newlines_p = newlines; + return 0; + + } else { + + pos_in_line = pos - 1; + + newlines++; + goto copying; + + } + + } + + } + + } else { + + if (ll_data->line[pos_in_line] == ' ' || ll_data->line[pos_in_line] == '\t') { + + /*ll_data->line[pos_in_line++] = ' ';*/ + + /*skipping_spaces = 1; + goto copying;*/ + + if (ll_data->line[pos_in_line] == '\t') { + + int cnt = 4 - (pos_in_line % 4); + int i = 0; + + ll_data->capacity += CAPACITY_INCREMENT; + + ll_data->line = xrealloc (ll_data->line, ll_data->capacity + 2); + ll_data->real_line = xrealloc (ll_data->real_line, ll_data->capacity + 1); + + for (; i < cnt; i++) { + ll_data->line[pos_in_line++] = ' '; + } + + continue; + + } + + } else if (ll_data->line[pos_in_line] == '\n') { + + if (pos_in_line > 0 && ll_data->line[pos_in_line - 1] == '\r') { + ll_data->line[--pos_in_line] = '\n'; + } + + ll_data->line[pos_in_line + 1] = '\0'; + ll_data->end_of_prev_real_line = pos_in_real_line; + + *line_p = ll_data->line; + *line_end_p = ll_data->line + pos_in_line; + + *real_line_p = ll_data->real_line; + *real_line_len_p = pos_in_real_line; + + *newlines_p = newlines; + return 0; + + } else if (ll_data->line[pos_in_line] == '\\') { + + /*ll_data->line[pos_in_line] = ' ';*/ + pos_in_line--; + + while (pos_in_real_line < ll_data->read_size) { + + if (ll_data->real_line[pos_in_real_line] == '\r' || ll_data->real_line[pos_in_real_line] == '\n') { + + if (ll_data->real_line[pos_in_real_line] == '\r') { + pos_in_real_line++; + } + + if (ll_data->real_line[pos_in_real_line] == '\n') { + pos_in_real_line++; + } + + break; + + } + + pos_in_real_line++; + + } + + newlines++; + continue; + + } else if (ll_data->line[pos_in_line] == '"') { + in_double_quote = 1; + } else if (ll_data->line[pos_in_line] == '\'') { + in_single_quote = 1; + } else if (ll_data->line[pos_in_line] == ';') { + + in_line_comment = 1; + goto copying; + + } + + } + + pos_in_line++; + + } + + if (feof (ifp)) { + + const char *filename; + unsigned long line_number; + + if (ll_data->read_size == 0) { + return 1; + } + + ll_data->line[pos_in_line] = '\n'; + ll_data->line[pos_in_line + 1] = '\0'; + + get_filename_and_line_number (&filename, &line_number); + + if (ll_data->new_line_number_p) { + line_number = *(ll_data->new_line_number_p); + } else { + line_number = 0; + } + + report_at (filename, line_number, REPORT_WARNING, "end of file not at end of line; newline inserted"); + + ll_data->end_of_prev_real_line = 0; + ll_data->read_size = 0; + + *line_p = ll_data->line; + *line_end_p = ll_data->line + pos_in_line; + + *real_line_p = ll_data->real_line; + *real_line_len_p = pos_in_real_line; + + *newlines_p = newlines; + return 0; + + } + + } + +} + +void load_line_destroy_internal_data (void *load_line_internal_data) { + + struct load_line_data *ll_data; + + if (load_line_internal_data) { + + ll_data = load_line_internal_data; + + free (ll_data->line); + free (ll_data->real_line); + free (ll_data); + + } + +} + +void *load_line_create_internal_data (unsigned long *new_line_number_p) { + + struct load_line_data *ll_data = xmalloc (sizeof (*ll_data));; + + ll_data->capacity = 0; + ll_data->line = NULL; + ll_data->real_line = NULL; + + ll_data->read_size = 0; + ll_data->end_of_prev_real_line = 0; + + ll_data->new_line_number_p = new_line_number_p; + return ll_data; + +} diff --git a/ll.h b/ll.h new file mode 100644 index 0000000..b6754ef --- /dev/null +++ b/ll.h @@ -0,0 +1,13 @@ +/****************************************************************************** + * @file ll.h + *****************************************************************************/ +#ifndef _LL_H +#define _LL_H + +#include +int load_line (char **line_p, char **line_end_p, char **real_line_p, unsigned long *real_line_len_p, unsigned long *newlines_p, FILE *ifp, void **load_line_internal_data_p); + +void load_line_destroy_internal_data (void *load_line_internal_data); +void *load_line_create_internal_data (unsigned long *new_line_number_p); + +#endif /* _LL_H */ diff --git a/macro.c b/macro.c new file mode 100644 index 0000000..f196af6 --- /dev/null +++ b/macro.c @@ -0,0 +1,555 @@ +/****************************************************************************** + * @file macro.c + *****************************************************************************/ +#include +#include +#include + +#include "cstr.h" +#include "hashtab.h" +#include "lex.h" +#include "lib.h" +#include "macro.h" +#include "report.h" +#include "vector.h" + +static struct hashtab hashtab_macros = { 0 }; + +void remove_all_macros (void) { + + struct macro *m; + int i; + + for (i = 0; i < hashtab_macros.capacity; i++) { + + if (!(m = (struct macro *) &hashtab_macros.entries[i])) { + continue; + } + + free (m->name); + free (m->value); + + } + + memset (&hashtab_macros, 0, sizeof (hashtab_macros)); + +} + +struct hashtab_name *find_macro (char *sname) { + + struct hashtab_name *key; + + if ((key = hashtab_get_key (&hashtab_macros, sname))) { + return key; + } + + return 0; + +} + +struct macro *get_macro (struct hashtab_name *key) { + return hashtab_get (&hashtab_macros, key); +} + +void add_macro (char *start, char **pp, int report_line) { + + char *sname, *caret = *pp, *arg; + unsigned int len; + + struct hashtab_name *key; + struct macro *m; + + if (is_end_of_line[(int) **pp]) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret - 1, "no macro name give in %%define directive"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no macro name give in %%define directive"); + } + + return; + + } + + if (!(sname = symname (pp))) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "macro names must be identifiers"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "macro names must be identifiers"); + } + + return; + + } + + if (strcmp (sname, "defined") == 0) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "\"%s\" cannout be used as a macro name", sname); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "\"%s\" cannout be used as a macro name", sname); + } + + return; + + } + + if (**pp != '(' && !isspace ((int) **pp)) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "whitespace is required after macro name"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "whitespace is required after macro name"); + } + + return; + + } + + if ((key = find_macro (sname))) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, caret, "\"%s\" redefined", sname); + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "\"%s\" redefined", sname); + } + + if ((m = hashtab_get (&hashtab_macros, key))) { + + while ((arg = vec_pop (&m->args))) { + free (arg); + } + + free (m); + + } + + hashtab_remove (&hashtab_macros, key); + + } else { + + if (!(key = hashtab_alloc_name (sname))) { + + free (sname); + return; + + } + + } + + m = xmalloc (sizeof (*m)); + m->nargs = -1; + + m->type = MACRO_USER; + m->name = sname; + + if (**pp == '(') { + + m->nargs = 0; + (*pp)++; + + while (!is_end_of_line[(int) **pp]) { + + *pp = skip_whitespace (*pp); + + if (**pp == ')') { + break; + } + + if (m->is_variadic) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "expected ')' after '...'"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected ')' after '...'"); + } + + while ((arg = vec_pop (&m->args))) { + free (arg); + } + + free (m); + return; + + } + + arg = *pp; + + while (!is_end_of_line[(int) *arg] && !isspace ((int) *arg)) { + + if (*arg == ',' || *arg == ')') { + break; + } + + arg++; + + } + + if (arg - *pp == 3) { + + if (memcmp (*pp, "...", 3) == 0) { + + m->is_variadic = 1; + + *pp = arg; + continue; + + } + + } + + if ((sname = symname (pp))) { + + vec_push (&m->args, sname); + m->nargs++; + + *pp = skip_whitespace (*pp); + + if (**pp != ',' && **pp != ')') { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "expected ',' or, ')' after parameter"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected ',' or, ')' after parameter"); + } + + goto err; + + } + + if (**pp == ')') { + break; + } + + (*pp)++; + continue; + + } + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "expected parameter name"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected parameter name"); + } + + goto err; + + } + + if (**pp != ')') { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "expected ')' before end of line"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected ')' before end of line"); + } + + goto err; + + } + + (*pp)++; + + } + + *pp = skip_whitespace (*pp); + + m->value = xstrdup (*pp); + len = strlen (m->value); + + if (is_end_of_line[(int) m->value[len - 1]]) { + m->value[len - 1] = '\0'; + } + + hashtab_put (&hashtab_macros, key, m); + + if (!m->is_variadic) { + + char *haystack = m->value, *needle = "__VA_ARGS__"; + char *p; + + while ((p = strstr (haystack, needle))) { + + haystack = (p + strlen (needle)); + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, m->value, p, "%s can only appear in the expansion of a variadic macro", needle); + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "%s can only appear in the expansion of a variadic macro", needle); + } + + } + + } + + return; + +err: + + while ((arg = vec_pop (&m->args))) { + free (arg); + } + + free (m); + return; + +} + +void remove_macro (char *start, char **pp, int report_line) { + + char *sname, *caret = *pp; + + struct hashtab_name *key; + struct macro *mp; + + if (!(sname = symname (pp))) { + + if (!(sname = symname (pp))) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "macro names must be identifiers"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "macro names must be identifiers"); + } + + return; + + } + + return; + + } + + if ((key = find_macro (sname))) { + + if ((mp = hashtab_get (&hashtab_macros, key))) { + free (mp); + } + + hashtab_remove (&hashtab_macros, key); + + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%undef directive"); + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "extra tokens at end of %%undef directive"); + } + + } + +} + +static struct vector *get_macro_args (char *start, char *macro_name, char **pp) { + + static struct vector args_list = { 0 }; + char *arg, saved_ch, ch; + + memset (&args_list, 0, sizeof (args_list)); + + if (**pp == '(') { + + (*pp)++; + + for (;;) { + + *pp = skip_whitespace (*pp); + + if (is_end_of_line[(int) **pp] || **pp == ')') { + break; + } + + arg = *pp; + ch = *arg; + + if (ch == '"' || ch == '\'') { + + (*pp)++; + + while (!is_end_of_line[(int) **pp]) { + + if (**pp == '\\') { + + (*pp)++; + + if (**pp == ch) { + (*pp)++; + } + + continue; + + } + + if (**pp == ch) { break; } + (*pp)++; + + } + + if (**pp == ch) { + (*pp)++; + } else { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, arg, "missing terminating %c character", ch); + } + + } else { + + while (!is_end_of_line[(int) **pp] && **pp != ' ' && **pp != '\t') { + + if (**pp == ',' || **pp == ')') { + break; + } + + (*pp)++; + + } + + } + + saved_ch = **pp; + **pp = '\0'; + + vec_push (&args_list, xstrdup (arg)); + + **pp = saved_ch; + + if (*(*pp = skip_whitespace (*pp)) == ',') { + (*pp)++; + } + + } + + if (**pp != ')') { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "unterminated argument list invoking macro \"%s\"", macro_name); + return 0; + + } + + (*pp)++; + + } + + return &args_list; + +} + +static char *process_value (struct macro *m, struct vector *args_list) { + + char *line = m->value, *arg; + int i; + + struct cstring str; + cstr_new (&str); + + while (!is_end_of_line[(int) *line]) { + again: + + if (line[0] == '#' && line[1] == '#') { + + if (str.size && ((char *) str.data)[str.size - 1] == ' ') { str.size--; } + + line += 2; + continue; + + } + + if (is_name_beginner ((int) *line)) { + + arg = symname (&line); + + if (strcmp (arg, "__VA_ARGS__") == 0) { + + for (i = m->nargs; i < args_list->length; i++) { + + arg = args_list->data[i]; + cstr_cat (&str, arg, strlen (arg)); + + if (i < args_list->length - 1) { + + cstr_ccat (&str, ','); + cstr_ccat (&str, ' '); + + } + + } + + continue; + + } + + for (i = 0; i < m->nargs; i++) { + + if (strcmp (m->args.data[i], arg) == 0) { + + arg = args_list->data[i]; + cstr_cat (&str, arg, strlen (arg)); + + goto again; + + } + + } + + cstr_cat (&str, arg, strlen (arg)); + + } else { + cstr_ccat (&str, *line++); + } + + } + + cstr_ccat (&str, '\0'); + return xstrdup (str.data); + +} + +char *process_macro (char *start, char **pp, struct macro *m) { + + struct vector *args_list; + char *caret; + + if (m->nargs > 0 || m->is_variadic) { + + args_list = 0; + caret = *pp; + + if ((args_list = get_macro_args (start, m->name, pp))) { + + if (args_list->length < m->nargs) { + + char *tmp = (m->nargs == 1 ? " argument" : " arguments"); + char *tmp2 = (args_list->length == 1 ? "only " : ""); + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "macro \"%s\" requires %d%s, but %s%d given", m->name, m->nargs, tmp, tmp2, args_list->length); + + } else if (args_list->length > m->nargs) { + + if (!m->is_variadic) { + + char *tmp = (args_list->length == 1 ? " argument" : " arguments"); + char *tmp2 = (m->nargs == 1 ? "just " : ""); + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "macro \"%s\" passed %d%s, but takes %s%d", m->name, args_list->length, tmp, tmp2, m->nargs); + + } + + } + + } + + return process_value (m, args_list); + + } + + return m->value; + +} + +void push_macro (struct hashtab_name *key, struct macro *m) { + hashtab_put (&hashtab_macros, key, m); +} diff --git a/macro.h b/macro.h new file mode 100644 index 0000000..220bef0 --- /dev/null +++ b/macro.h @@ -0,0 +1,34 @@ +/****************************************************************************** + * @file macro.h + *****************************************************************************/ +#ifndef _MACRO_H +#define _MACRO_H + +#include "vector.h" + +#define MACRO_BUILTIN 0 +#define MACRO_USER 1 + +struct macro { + + char *name, *value; + int is_variadic, type; + + struct vector args; + int nargs; + +}; + +#include "hashtab.h" +struct hashtab_name *find_macro (char *sname); +struct macro *get_macro (struct hashtab_name *key); + +void remove_all_macros (void); + +void add_macro (char *start, char **pp, int report_line); +void remove_macro (char *start, char **pp, int report_line); + +char *process_macro (char *start, char **pp, struct macro *m); +void push_macro (struct hashtab_name *key, struct macro *m); + +#endif /* _MACRO_H */ diff --git a/obj.c b/obj.c new file mode 100644 index 0000000..9eda912 --- /dev/null +++ b/obj.c @@ -0,0 +1,302 @@ +/****************************************************************************** + * @file obj.c + *****************************************************************************/ +#include +#include +#include + +#include "as.h" +#include "fixup.h" +#include "frag.h" +#include "obj.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +static void write_to_byte_array (unsigned char *arr, unsigned long value, int size) { + + int i; + + for (i = 0; i < size; i++) { + arr[i] = (value >> (8 * i)) & 0xff; + } + +} + +static int output_relocation (struct fixup *fixup, unsigned long start_address_of_section, FILE *fp) { + + struct relocation_info reloc; + + long log2_of_size, size; + unsigned long r_symbolnum; + + write_to_byte_array (reloc.r_address, fixup->frag->address + fixup->where - start_address_of_section, 4); + + if (symbol_is_section_symbol (fixup->add_symbol)) { + + if (symbol_get_section (fixup->add_symbol) == text_section) { + r_symbolnum = N_TEXT; + } else if (symbol_get_section (fixup->add_symbol) == data_section) { + r_symbolnum = N_DATA; + } else if (symbol_get_section (fixup->add_symbol) == bss_section) { + r_symbolnum = N_BSS; + } else { + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "invalid section %s", section_get_name (symbol_get_section (fixup->add_symbol))); + exit (EXIT_FAILURE); + + } + + } else { + + struct symbol *symbol; + long symbol_number; + + for (symbol = symbols, symbol_number = 0; symbol && (symbol != fixup->add_symbol); symbol = symbol->next) { + + if (symbol_is_external (symbol) || symbol_is_undefined (symbol)) { + symbol_number++; + } + + } + + r_symbolnum = symbol_number; + r_symbolnum |= (1LU << 31); + + } + + if (fixup->pcrel) { + r_symbolnum |= (1LU << 28); + } + + for (log2_of_size = -1, size = fixup->size; size; size >>= 1, log2_of_size++); + r_symbolnum |= ((unsigned long) log2_of_size << 29); + + if (fixup->reloc_type == RELOC_TYPE_FAR_CALL) { + r_symbolnum |= (1LU << 27); + } + + write_to_byte_array (reloc.r_symbolnum, r_symbolnum, 4); + + if (fwrite (&reloc, sizeof (reloc), 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "error writing text relocations"); + return 1; + + } + + return 0; + +} + +void output_obj (FILE *fp) { + + unsigned long start_address_of_data; + struct fixup *fixup; + + unsigned long symbol_table_size; + struct symbol *symbol; + + unsigned long string_table_pos; + struct frag *frag; + + unsigned long text_size, data_size, bss_size; + unsigned long tr_size, dr_size; + + struct exec header; + memset (&header, 0, sizeof (header)); + + write_to_byte_array (header.a_info, 0x00640000 | MAGIC, 4); + + if ((symbol = state->end_symbol)) { + write_to_byte_array (header.a_entry, symbol_get_value (symbol), 4); + } + + if (fseek (fp, sizeof (header), SEEK_SET)) { + + report_at (program_name, 0, REPORT_ERROR, "failed whilst seeking passed header"); + return; + + } + + section_set (text_section); + text_size = 0; + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + if (fwrite (frag->buf, frag->fixed_size, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed whilst writing text"); + return; + + } + + text_size += frag->fixed_size; + + } + + write_to_byte_array (header.a_text, text_size, 4); + + section_set (data_section); + data_size = 0; + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + if (fwrite (frag->buf, frag->fixed_size, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed whilst writing data"); + return; + + } + + data_size += frag->fixed_size; + + } + + write_to_byte_array (header.a_data, data_size, 4); + + section_set (bss_section); + bss_size = 0; + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + bss_size += frag->fixed_size; + + } + + write_to_byte_array (header.a_bss, bss_size, 4); + + section_set (text_section); + tr_size = 0; + + start_address_of_data = 0; + + for (fixup = current_frag_chain->first_fixup; fixup; fixup = fixup->next) { + + if (fixup->done) { + continue; + } + + if (output_relocation (fixup, start_address_of_data, fp)) { + return; + } + + tr_size += sizeof (struct relocation_info); + + } + + write_to_byte_array (header.a_trsize, tr_size, 4); + + section_set (data_section); + dr_size = 0; + + start_address_of_data = current_frag_chain->first_frag->address; + + for (fixup = current_frag_chain->first_fixup; fixup; fixup = fixup->next) { + + if (fixup->done) { + continue; + } + + if (output_relocation (fixup, start_address_of_data, fp)) { + return; + } + + dr_size += sizeof (struct relocation_info); + + } + + write_to_byte_array (header.a_drsize, dr_size, 4); + + symbol_table_size = 0; + string_table_pos = 4; + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_external (symbol) || symbol_is_undefined (symbol)) { + + struct nlist symbol_entry; + memset (&symbol_entry, 0, sizeof (symbol_entry)); + + write_to_byte_array (symbol_entry.n_strx, string_table_pos, 4); + string_table_pos += strlen (symbol->name) + 1; + + if (symbol->section == undefined_section) { + symbol_entry.n_type = N_UNDF; + } else if (symbol->section == text_section) { + symbol_entry.n_type = N_TEXT; + } else if (symbol->section == data_section) { + symbol_entry.n_type = N_DATA; + } else if (symbol->section == bss_section) { + symbol_entry.n_type = N_BSS; + } else if (symbol->section == absolute_section) { + symbol_entry.n_type = N_ABS; + } else { + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "invalid section %s", section_get_name (symbol->section)); + exit (EXIT_FAILURE); + + } + + write_to_byte_array (symbol_entry.n_value, symbol_get_value (symbol), 4); + symbol_entry.n_type |= N_EXT; + + if (fwrite (&symbol_entry, sizeof (symbol_entry), 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "error writing symbol table"); + return; + + } + + symbol_table_size += sizeof (symbol_entry); + + } + + } + + write_to_byte_array (header.a_syms, symbol_table_size, 4); + + if (fwrite (&string_table_pos, 4, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed to write string table"); + return; + + } + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_external (symbol) || symbol_is_undefined (symbol)) { + + if (fwrite (symbol->name, strlen (symbol->name) + 1, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed to write string table"); + return; + + } + + } + + } + + rewind (fp); + + if (fwrite (&header, sizeof (header), 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed to write header"); + return; + + } + +} diff --git a/obj.h b/obj.h new file mode 100644 index 0000000..a92812e --- /dev/null +++ b/obj.h @@ -0,0 +1,46 @@ +/****************************************************************************** + * @file obj.h + *****************************************************************************/ +#ifndef _OBJ_H +#define _OBJ_H + +struct exec { + + unsigned char a_info[4]; + unsigned char a_text[4]; + unsigned char a_data[4]; + unsigned char a_bss[4]; + unsigned char a_syms[4]; + unsigned char a_entry[4]; + unsigned char a_trsize[4]; + unsigned char a_drsize[4]; + +}; + +#define MAGIC 0471 + +struct relocation_info { + + unsigned char r_address[4]; + unsigned char r_symbolnum[4]; + +}; + +#define N_UNDF 0x00 +#define N_ABS 0x02 +#define N_TEXT 0x04 +#define N_DATA 0x06 +#define N_BSS 0x08 + +struct nlist { + + unsigned char n_strx[4]; + unsigned char n_type; + + unsigned char n_value[4]; + +}; + +#define N_EXT 0x01 + +#endif /* _OBJ_H */ diff --git a/process.c b/process.c new file mode 100644 index 0000000..c0a38c8 --- /dev/null +++ b/process.c @@ -0,0 +1,1494 @@ +/****************************************************************************** + * @file process.c + *****************************************************************************/ +#include +#include +#include +#include +#include + +#include "as.h" +#include "cstr.h" +#include "eval.h" +#include "expr.h" +#include "frag.h" +#include "hashtab.h" +#include "kwd.h" +#include "lex.h" +#include "lib.h" +#include "listing.h" +#include "ll.h" +#include "macro.h" +#include "process.h" +#include "report.h" +#include "section.h" +#include "symbol.h" +#include "vector.h" + +struct pp_pseudo_op_entry { + + const char *name; + void (*handler) (char *start, char **pp); + +}; + +static struct vector vec_include_paths = { 0 }; + +struct cond { + + char *directive; + int ignore_line; + + char *filename; + unsigned long line_number; + + int has_else; + +}; + +static struct vector vec_ifstack = { 0 }; +static int ignore_line = 0, iflevel = 0; + +static void handler_if (char *start, char **pp) { + + struct cond *cond; + + if (!ignore_line) { + + cond = xmalloc (sizeof (*cond)); + + cond->ignore_line = ignore_line; + cond->directive = xstrdup ("if"); + + cond->filename = xstrdup (get_filename ()); + cond->line_number = get_line_number (); + + vec_push (&vec_ifstack, cond); + ignore_line = !eval (start, pp); + + } else { + iflevel++; + } + +} + +static void handler_ifdef (char *start, char **pp) { + + struct cond *cond; + char *sname, *caret; + + if (!ignore_line) { + + cond = xmalloc (sizeof (*cond)); + + cond->ignore_line = ignore_line; + cond->directive = xstrdup ("ifdef"); + + cond->filename = xstrdup (get_filename ()); + cond->line_number = get_line_number (); + + vec_push (&vec_ifstack, cond); + *pp = skip_whitespace (*pp); + + if (is_name_beginner ((int) **pp)) { + + caret = (*pp); + + while (is_name_part ((int) **pp)) { + (*pp)++; + } + + sname = xstrndup (caret, *pp - caret); + ignore_line = (find_macro (sname) == NULL); + + free (sname); + + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%ifdef directive"); + } + + } else { + iflevel++; + } + +} + +static void handler_ifndef (char *start, char **pp) { + + struct cond *cond; + char *sname, *caret; + + if (!ignore_line) { + + cond = xmalloc (sizeof (*cond)); + + cond->ignore_line = ignore_line; + cond->directive = xstrdup ("ifndef"); + + cond->filename = xstrdup (get_filename ()); + cond->line_number = get_line_number (); + + vec_push (&vec_ifstack, cond); + *pp = skip_whitespace (*pp); + + if (is_name_beginner ((int) **pp)) { + + caret = (*pp); + + while (is_name_part ((int) **pp)) { + (*pp)++; + } + + sname = xstrndup (caret, *pp - caret); + ignore_line = (find_macro (sname) != NULL); + + free (sname); + + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%ifndef directive"); + } + + } else { + iflevel++; + } + +} + +static void handler_elif (char *start, char **pp) { + + struct cond *cond; + + if (!iflevel) { + + if (vec_ifstack.length == 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%elif without %%if"); + return; + + } else { + + cond = vec_ifstack.data[vec_ifstack.length - 1]; + + if (cond->has_else > 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%elif after %%else"); + return; + + } + + } + + ignore_line = (ignore_line && !eval (start, pp)); + + } + +} + +static void handler_else (char *start, char **pp) { + + struct cond *cond; + + if (!iflevel) { + + if (vec_ifstack.length == 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%else without %%if"); + return; + + } else { + + cond = vec_ifstack.data[vec_ifstack.length - 1]; + + if (cond->has_else > 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%else after %%else"); + return; + + } + + cond->has_else++; + + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%else directive"); + } + + ignore_line = !ignore_line; + + } + +} + +static void handler_endif (char *start, char **pp) { + + struct cond *cond; + + if (!iflevel) { + + if ((cond = vec_pop (&vec_ifstack))) { + + ignore_line = cond->ignore_line; + + free (cond->filename); + free (cond->directive); + + free (cond); + + } else { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%endif without %%if"); + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%endif directive"); + } + + } else { + iflevel--; + } + +} + +static struct pp_pseudo_op_entry cond_pseudo_op_table[] = { + + { "if", &handler_if, }, + { "ifdef", &handler_ifdef }, + { "ifndef", &handler_ifndef }, + { "elif", &handler_elif }, + { "else", &handler_else }, + { "endif", &handler_endif }, + + { 0, 0 } + +}; + +static struct hashtab hashtab_cond_pseudo_ops = { 0 }; +static int includes = 0; + +static void install_cond_pseudo_op_table (struct pp_pseudo_op_entry *table) { + + struct pp_pseudo_op_entry *entry; + struct hashtab_name *key; + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_cond_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_cond_pseudo_ops, key, entry); + + } + +} + +static struct pp_pseudo_op_entry *find_cond_directive (char *name) { + + struct hashtab_name *key; + struct pp_pseudo_op_entry *entry; + + if ((key = hashtab_get_key (&hashtab_cond_pseudo_ops, name))) { + + if ((entry = hashtab_get (&hashtab_cond_pseudo_ops, key))) { + return entry; + } + + } + + return 0; + +} + + +static struct hashtab hashtab_pseudo_ops = { 0 }; + +static void handler_define (char *start, char **pp) { + add_macro (start, pp, 1); +} + +static void handler_error (char *start, char **pp) { + + unsigned long len = strlen (*pp); + char *temp, *type = "error"; + + if ((*pp)[len - 1] == '\n') { + (*pp)[len - 1] = '\0'; + } + + temp = xmalloc (1 + strlen (type) + 1 + strlen (*pp) + 1); + sprintf (temp, "#%s %s", type, *pp); + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%s", temp); + free (temp); + +} + +static void handler_include (char *start, char **pp) { + + const char *orig_fn = get_filename (); + unsigned long orig_ln = get_line_number (); + + char *caret, *sname, ch; + int i; + + char *inc_path, *tmp; + FILE *fp; + + struct hashtab_name *key; + struct macro *m; + + if (**pp != '"' && **pp != '<') { + + report_line_at (orig_fn, orig_ln, REPORT_ERROR, start, *pp, "%%include expects \"FILENAME\" or "); + return; + + } + + ch = (**pp == '"' ? '"' : '>'); + caret = (*pp)++; + + while (!is_end_of_line[(int) **pp]) { + + if (**pp == ch) { break; } + (*pp)++; + + } + + if (**pp != ch) { + + report_line_at (orig_fn, orig_ln, REPORT_ERROR, start, caret, "%%include expects \"FILENAME\" or "); + return; + + } else { + (*pp)++; + } + + sname = xstrndup (caret + 1, *pp - caret - 2); + + if (ch == '"' && (fp = fopen (sname, "r"))) { + + fclose (fp); + + includes++; + process_file (sname); + + includes--; + goto end; + + } + + for (i = vec_include_paths.length - 1; i >= 0; i--) { + + inc_path = vec_include_paths.data[i]; + + tmp = xmalloc (strlen (inc_path) + strlen (sname) + 1); + sprintf (tmp, "%s%s", inc_path, sname); + + if ((fp = fopen (tmp, "r"))) { + + fclose (fp); + includes++; + + process_file (tmp); + includes--; + + free (tmp); + goto end; + + } + + free (tmp); + + } + + report_line_at (orig_fn, orig_ln, REPORT_ERROR, start, caret, "failed to open '%s' for reading", sname); + +end: + + set_filename_and_line_number (orig_fn, orig_ln); + + if ((key = find_macro ("__FILE__"))) { + + if ((m = get_macro (key)) && m->type == MACRO_BUILTIN) { + + free (m->value); + + m->value = xmalloc (1 + strlen (orig_fn) + 2); + sprintf (m->value, "\"%s\"", orig_fn); + + } + + } + + if ((key = find_macro ("__LINE__"))) { + + if ((m = get_macro (key)) && m->type == MACRO_BUILTIN) { + + free (m->value); + + m->value = xmalloc (23); + sprintf (m->value, "%lu", orig_ln); + + } + + } + + free (sname); + +} + +static void handler_undef (char *start, char **pp) { + remove_macro (start, pp, 1); +} + +static void handler_warning (char *start, char **pp) { + + unsigned long len = strlen (*pp); + char *temp, *type = "warning"; + + if ((*pp)[len - 1] == '\n') { + (*pp)[len - 1] = '\0'; + } + + temp = xmalloc (1 + strlen (type) + 1 + strlen (*pp) + 1); + sprintf (temp, "#%s %s", type, *pp); + + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, skip_whitespace (start + 1), "%s", temp); + free (temp); + +} + +static struct pp_pseudo_op_entry pseudo_op_table[] = { + + { "define", &handler_define }, + { "error", &handler_error }, + { "include", &handler_include }, + { "undef", &handler_undef }, + { "warning", &handler_warning }, + + { 0, 0 } + +}; + +static void install_pp_pseudo_op_table (struct pp_pseudo_op_entry *table) { + + struct pp_pseudo_op_entry *entry; + struct hashtab_name *key; + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_pseudo_ops, key, entry); + + } + +} + +struct pp_pseudo_op_entry *find_directive (char *name) { + + struct hashtab_name *key; + struct pp_pseudo_op_entry *entry; + + if ((key = hashtab_get_key (&hashtab_pseudo_ops, name))) { + + if ((entry = hashtab_get (&hashtab_pseudo_ops, key))) { + return entry; + } + + } + + return 0; + +} + + +static void init_builtin_macros (void) { + + static char *builtins[] = { "__FILE__", "__LINE__" }; + char *name; + + struct hashtab_name *key; + struct macro *m; + + unsigned long cnt = (sizeof (builtins) / sizeof (*builtins)); + unsigned i; + + for (i = 0; i < cnt; i++) { + + name = xstrdup (builtins[i]); + + if ((key = hashtab_alloc_name (name))) { + + m = xmalloc (sizeof (*m)); + m->type = MACRO_BUILTIN; + + m->name = name; + m->value = xstrdup (""); + + push_macro (key, m); + + } + + } + + name = xstrdup ("__SASM__"); + + if ((key = hashtab_alloc_name (name))) { + + m = xmalloc (sizeof (*m)); + m->type = MACRO_BUILTIN; + + m->name = name; + m->value = "1"; + + push_macro (key, m); + + } + +} + +static void init_date_time_macros (void) { + + char *timep, *buf, *name, temp[3] = { 0 }; + time_t now; + + struct hashtab_name *key; + struct macro *m; + + time (&now); + timep = ctime (&now); + + name = xstrdup ("__TIME__"); + + if ((key = hashtab_alloc_name (name))) { + + buf = xmalloc (11); + sprintf (buf, "\"%.8s\"", timep + 11);; + + m = xmalloc (sizeof (*m)); + m->type = MACRO_BUILTIN; + + m->name = name; + m->value = buf; + + push_macro (key, m); + + } + + name = xstrdup ("__DATE__"); + + if ((key = hashtab_alloc_name (name))) { + + sprintf (temp, "%.2s", timep + 8); + buf = xmalloc (14); + + if (atoi (temp) < 10) { + sprintf (buf, "\"%.3s %.1s %.4s\"", timep + 4, timep + 9, timep + 20); + } else { + sprintf (buf, "\"%.3s %.2s %.4s\"", timep + 4, timep + 8, timep + 20); + } + + m = xmalloc (sizeof (*m)); + m->type = MACRO_BUILTIN; + + m->name = name; + m->value = buf; + + push_macro (key, m); + + } + +} + +int preprocess_init (void) { + + struct list *item; + char *opt, *nopt, *p; + + set_filename (xstrdup ("")); + set_line_number (1); + + remove_all_macros (); + + while ((opt = vec_pop (&vec_include_paths))) { + free (opt); + } + + init_builtin_macros (); + init_date_time_macros (); + + if (state->pplist) { + + item = state->pplist; + + do { + + item = item->next; + + if (!(opt = item->data)) { + continue; + } + + if (opt[0] != '-') { + + report_at (program_name, 0, REPORT_ERROR, "unrecognised option '%s'", opt); + continue; + + } + + switch (opt[1]) { + + case 'D': + + opt = nopt = xstrdup (opt + 2); + + if ((p = strrchr (nopt, '='))) { + *p++ = ' '; + } + + add_macro (nopt, &nopt, 0); + free (opt); + + break; + + case 'I': + + vec_push (&vec_include_paths, xstrdup (opt + 2)); + break; + + case 'U': + + opt = nopt = xstrdup (opt + 2); + + remove_macro (nopt, &nopt, 0); + free (opt); + + break; + + default: + + report_at (program_name, 0, REPORT_ERROR, "unrecognised option '%s'", opt); + break; + + } + + } while (item != state->pplist); + + } + + install_cond_pseudo_op_table (cond_pseudo_op_table); + install_pp_pseudo_op_table (pseudo_op_table); + + return get_error_count () > 0; + +} + + +static char *preprocess_line (char *src, int in_macro) { + + struct cstring cstr; + char *line; + + char *caret = src, *start; + char *sname, ch; + + struct hashtab_name *key; + struct macro *m; + + cstr_new (&cstr); + + while (!is_end_of_line[(int) *caret]) { + + start = caret; + + if (*caret == ' ' || *caret == '\t') { + + while (*caret == ' ' || *caret == '\t') { + + cstr_ccat (&cstr, ' '); + caret++; + + } + + continue; + + } + + if (*caret == '"' || *caret == '\'') { + + ch = *caret++; + + while (!is_end_of_line[(int) *caret]) { + + if (*caret == '\\') { + + caret++; + + if (!is_end_of_line[(int) *caret]) { + caret++; + } + + continue; + + } + + if (*caret == ch) { break; } + caret++; + + } + + if (*caret != ch) { + + char *temp = xmalloc ((caret - start) + 2); + sprintf (temp, "%.*s%c", (int) (caret - start), start, ch); + + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, src, start, "missing terminating %c character", ch); + cstr_cat (&cstr, temp, strlen (temp)); + + continue; + + } + + caret++; + + cstr_cat (&cstr, start, caret - start); + continue; + + } + + if (is_name_beginner ((int) *caret)) { + + sname = symname (&caret); + + if ((key = find_macro (sname))) { + + if ((m = get_macro (key))) { + + char *pm; + int spaces = 0; + + if (*caret == ' ' || *caret == '\t') { + + cstr_ccat (&cstr, ' '); + + while (*caret == ' ' || *caret == '\t') { + + spaces++; + caret++; + + } + + } + + if ((pm = process_macro (start, &caret, m))) { + + char *temp = preprocess_line (pm, 1); + cstr_cat (&cstr, temp, strlen (temp)); + + if (!is_end_of_line[(int) *pm]) { + + while (spaces--) { + cstr_ccat (&cstr, ' '); + } + + } + + } + + } + + continue; + + } + + cstr_cat (&cstr, start, caret - start); + continue; + + } + + if (*caret == '@') { + + char *arg, *temp = (caret + 1); + + if ((arg = symname (&temp))) { + + if (xstrcasecmp (arg, "DataSize") == 0) { + + caret = temp; + free (arg); + + cstr_ccat (&cstr, state->data_size | 0x30); + continue; + + } + + if (xstrcasecmp (arg, "Model") == 0) { + + caret = temp; + free (arg); + + cstr_ccat (&cstr, (state->model > 0 ? state->model | 0x30 : '1')); + continue; + + } + + free (arg); + + } + + } + + if (isdigit ((int) *caret)) { + + if (caret[0] == '0' && tolower ((int) caret[1]) == 'x') { + caret += 2; + } + + while (isdigit ((int) *caret)) { + caret++; + } + + cstr_cat (&cstr, start, caret - start); + continue; + + } + + if (ispunct ((int) *caret)) { + + cstr_ccat (&cstr, *caret); + + caret++; + continue; + + } + + report_line_at (get_filename (), get_line_number (), REPORT_INTERNAL_ERROR, src, caret, "Do we see this???"); + caret++; + + } + + if (!in_macro && state->ofp) { + cstr_ccat (&cstr, '\n'); + } + + cstr_ccat (&cstr, '\0'); + + line = xstrdup (cstr.data);; + cstr_free (&cstr); + + return line; + +} + +struct section *machine_dependent_simplified_expression_read_into (char *start, char **pp, struct expr *expr); + +static struct section *get_known_section_expression (char *start, char **pp, struct expr *expr) { + + struct section *section = machine_dependent_simplified_expression_read_into (start, pp, expr); + + if (expr->type == EXPR_TYPE_INVALID || expr->type == EXPR_TYPE_ABSENT) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected address expression"); + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + section = absolute_section; + + } + + if (section == undefined_section) { + + if (expr->add_symbol && symbol_get_section (expr->add_symbol) != expr_section) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "symbol \"%s\" undefined; zero assumed", symbol_get_name (expr->add_symbol)); + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "some symbol undefined; zero assumed"); + } + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + section = absolute_section; + + } + + return section; + +} + +static void do_org (struct section *section, struct expr *expr, unsigned long fill_value) { + + struct symbol *symbol; + + unsigned char *p_in_frag; + unsigned long offset; + + if (section != current_section && section != absolute_section && section != expr_section) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid section \"%s\"", section_get_name (section)); + } + + symbol = expr->add_symbol; + offset = expr->add_number; + + if (fill_value && current_section == bss_section) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "ignoring fill value in section \"%s\"", section_get_name (current_section)); + } + + if (expr->type != EXPR_TYPE_CONSTANT && expr->type != EXPR_TYPE_SYMBOL) { + + symbol = make_expr_symbol (expr); + offset = 0; + + } + + *(p_in_frag = frag_alloc_space (1)) = (unsigned char) fill_value; + frag_set_as_variant (RELAX_TYPE_ORG, 0, symbol, offset, 0); + +} + +static void internal_set (char *start, char **pp, struct symbol *symbol) { + + struct expr expr; + machine_dependent_simplified_expression_read_into (start, pp, &expr); + + if (expr.type == EXPR_TYPE_INVALID) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid expression"); + } else if (expr.type == EXPR_TYPE_ABSENT) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "missing expression"); + } + + if (symbol_is_section_symbol (symbol)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "attempt to set value of section symbol"); + return; + + } + + switch (expr.type) { + + case EXPR_TYPE_INVALID: + case EXPR_TYPE_ABSENT: + + expr.add_number = 0; + /* fall through */ + + case EXPR_TYPE_CONSTANT: + + symbol_set_frag (symbol, &zero_address_frag); + symbol_set_section (symbol, absolute_section); + symbol_set_value (symbol, expr.add_number); + + break; + + default: + + symbol_set_frag (symbol, &zero_address_frag); + symbol_set_section (symbol, expr_section); + symbol_set_value_expression (symbol, &expr); + + break; + + } + +} + +static void assign_symbol (char *start, char **pp, char *name) { + + struct symbol *symbol; + + if (name[0] == '.' && name[1] == '\0') { + + struct section *section; + struct expr expr; + + section = get_known_section_expression (start, pp, &expr); + do_org (section, &expr, 0); + + return; + + } + + symbol = symbol_find_or_make (name, SYMBOL_SCOPE_LOCAL); + internal_set (start, pp, symbol); + +} + +static void handle_org (char *start, char **pp) { + + struct expr expr; + + struct section *section = get_known_section_expression (start, pp, &expr); + unsigned long fill_value = 0; + + if (**pp == ',') { + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "+++handle_org"); + (*pp)++; + + } + + do_org (section, &expr, fill_value); + +} + +extern void machine_dependent_assemble_line (char *start, char *line); +extern void machine_dependent_handle_proc (char *start, char **pp, char *name); +extern void machine_dependent_handle_endp (char *start, char **pp, char *name); + +static char *find_end_of_line (char *line) { + + while (!is_end_of_line[(int) *line]) { + + if (line++[0] == '\"') { + + while (*line && *line != '\"') { + + if (line++[0] == '\\' && *line) { + line++; + } + + } + + } + + } + + return line; + +} + +extern void *machine_dependent_find_templates (char *name, int check_suffix); +extern void *machine_dependent_find_reg_entry (char *name); + +static void process_line (char *line, char *line_end) { + + char *start = line, *caret; + char *arg = 0, saved_ch; + + struct pseudo_op_entry *poe; + int has_colon; + + caret = (line = skip_whitespace (line)); + + if (caret >= line_end) { + return; + } + + while (line < line_end) { + + has_colon = 0; + + if (is_name_beginner ((int) *line)) { + + caret = line; + + if (!(arg = symname (&line))) { + goto check; + } + + if (xstrcasecmp (arg, "equ") == 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "equ not preceded by label"); + + ignore_rest_of_line (&line); + goto check; + + } + + if (xstrcasecmp (arg, "org") == 0) { + + line = skip_whitespace (line); + + handle_org (start, &line); + goto check; + + } + + if (xstrcasecmp (arg, "proc") == 0 || xstrcasecmp (arg, "endp") == 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "procedure must have a name"); + + ignore_rest_of_line (&line); + goto check; + + } + + line = skip_whitespace (line); + + if ((poe = find_poe (arg))) { + + poe->handler (start, &line); + goto check; + + } + + if (machine_dependent_find_templates (arg, 1)) { + + saved_ch = *(line = find_end_of_line (line)); + *line = '\0'; + + machine_dependent_assemble_line (start, skip_whitespace (start)); + *line = saved_ch; + + goto check; + + } + + if (!machine_dependent_find_reg_entry (arg)) { + + char *temp, *directive; + struct symbol *symbol; + + if (*line == ':' || is_end_of_line[(int) *line]) { + + if ((has_colon = (*line == ':'))) { + + temp = (line = skip_whitespace (line + 1)); + + if ((directive = symname (&line))) { + + if (xstrcasecmp (directive, "equ") == 0) { + + assign_symbol (start, &line, arg); + + free (directive); + goto check; + + } + + free (directive); + + } + + line = temp; + + } + + symbol = symbol_label (start, caret, arg); + symbol->scope = SYMBOL_SCOPE_LOCAL; + + if (!has_colon) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, caret, "label alone without colon"); + } + + free (arg); + continue; + + } + + temp = line; + + if ((directive = symname (&line))) { + + if ((poe = find_data_poe (directive))) { + + free (directive); + + symbol = symbol_label (start, caret, arg); + symbol->scope = SYMBOL_SCOPE_LOCAL; + + poe->handler (start, &line); + goto check; + + } + + if (xstrcasecmp (directive, "equ") == 0) { + + assign_symbol (start, &line, arg); + + free (directive); + goto check; + + } + + if (xstrcasecmp (directive, "proc") == 0) { + + machine_dependent_handle_proc (start, &line, arg); + + free (directive); + goto check; + + } + + if (xstrcasecmp (directive, "endp") == 0) { + + machine_dependent_handle_endp (start, &line, arg); + + free (directive); + goto check; + + } + + free (directive); + + } + + line = temp; + + } + + line = caret; + + } + + check: + + if (arg) { free (arg); } + arg = 0; + + line = skip_whitespace (line); + + if (is_end_of_line[(int) *line]) { + break; + } + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, line, "junk '%c' at the end of line", *line); + ignore_rest_of_line (&line); + + } + +} + +void process_file (const char *ifile) { + + char *start, *arg, *caret; + FILE *fp; + + char *line, *line_end, *real_line; + unsigned long real_line_len; + + unsigned long newlines; + unsigned long new_line_number; + + struct pp_pseudo_op_entry *poe; + void *load_line_internal_data = NULL; + + struct cond *cond; + int cond_idx, proc_idx; + + struct hashtab_name *key; + struct macro *m; + + if (!ifile || strcmp (ifile, "-") == 0) { + + set_filename (xstrdup ("")); + fp = stdin; + + } else { + + set_filename (xstrdup (ifile)); + + if (!(fp = fopen (ifile, "r"))) { + + report_at (program_name, 0, REPORT_FATAL_ERROR, "Failed to open '%s' for reading", ifile); + return; + + } + + } + + set_line_number (0); + new_line_number = 1; + + if ((key = find_macro ("__FILE__"))) { + + const char *filename = get_filename ();; + + if (filename && (m = get_macro (key)) && m->type == MACRO_BUILTIN) { + + free (m->value); + + m->value = xmalloc (1 + strlen (filename) + 2); + sprintf (m->value, "\"%s\"", filename); + + } + + } + + load_line_internal_data = load_line_create_internal_data (&new_line_number); + + while (!load_line (&line, &line_end, &real_line, &real_line_len, &newlines, fp, &load_line_internal_data)) { + + set_line_number (new_line_number); + new_line_number += newlines + 1; + + if (state->lfile) { + + update_listing_line (current_frag); + add_listing_line (real_line, real_line_len, get_filename (), get_line_number ()); + + } + + if ((key = find_macro ("__LINE__"))) { + + if ((m = get_macro (key)) && m->type == MACRO_BUILTIN) { + + free (m->value); + + m->value = xmalloc (23); + sprintf (m->value, "%lu", get_line_number ()); + + } + + } + + start = line; + caret = (line = skip_whitespace (line)); + + if (!ignore_line && line >= line_end) { + continue; + } + + if (*line == '%') { + + caret = (line = skip_whitespace (line + 1)); + + if (is_name_beginner ((int) *line)) { + + while (is_name_part ((int) *line)) { + line++; + } + + arg = xstrndup (caret, line - caret); + line = skip_whitespace (line); + + if ((poe = find_cond_directive (arg))) { + + poe->handler (start, &line); + free (arg); + + continue; + + } + + if (!ignore_line) { + + if ((poe = find_directive (arg))) { + + poe->handler (start, &line); + free (arg); + + continue; + + } + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "unknown preprocessor directive '%%%s'", arg); + + } + + free (arg); + continue; + + } + + if (!is_end_of_line[(int) *caret] && !ignore_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "unknown preprocessor directive '%%%c'", *caret); + } + + continue; + + } + + if (is_name_beginner ((int) *line)) { + + if ((arg = symname (&line))) { + + line = skip_whitespace (line); + + if ((poe = find_cond_directive (arg))) { + + poe->handler (start, &line); + free (arg); + + continue; + + } + + } + + line = caret; + + } + + if (!ignore_line) { + + if (line < line_end) { + + char *tokenized_line = preprocess_line (line, 0); + + process_line (tokenized_line, tokenized_line + strlen (tokenized_line)); + free (tokenized_line); + + } + + } + + } + + if (state->lfile) { + update_listing_line (current_frag); + } + + load_line_destroy_internal_data (load_line_internal_data); + + if (!includes) { + + for (cond_idx = 0; cond_idx < vec_ifstack.length; cond_idx++) { + + cond = vec_ifstack.data[cond_idx]; + report_at (cond->filename, cond->line_number, REPORT_ERROR, "unterminated %%%s statement", cond->directive); + + free (cond->filename); + free (cond->directive); + + free (cond); + + } + + for (proc_idx = 0; proc_idx < state->procs.length; proc_idx++) { + + struct proc *proc = (struct proc *) state->procs.data[proc_idx]; + report_at (proc->filename, proc->line_number, REPORT_ERROR, "procedure %s is not closed", proc->name); + + } + + } + + if (fp != stdin) { fclose (fp); } + +} diff --git a/process.h b/process.h new file mode 100644 index 0000000..a75313a --- /dev/null +++ b/process.h @@ -0,0 +1,10 @@ +/****************************************************************************** + * @file process.h + *****************************************************************************/ +#ifndef _PROCESS_H +#define _PROCESS_H + +int preprocess_init (void); +void process_file (const char *ifile); + +#endif /* _PROCESS_H */ diff --git a/report.c b/report.c new file mode 100644 index 0000000..e78065b --- /dev/null +++ b/report.c @@ -0,0 +1,181 @@ +/****************************************************************************** + * @file report.c + *****************************************************************************/ +#include +#include +#include + +#include "report.h" +unsigned int errors = 0; + +#ifndef __PDOS__ +#if defined (_WIN32) +# include +static int OriginalConsoleColor = -1; +#endif + +static void reset_console_color (void) { + +#if defined (_WIN32) + + HANDLE hStdError = GetStdHandle (STD_ERROR_HANDLE); + + if (OriginalConsoleColor == -1) { return; } + + SetConsoleTextAttribute (hStdError, OriginalConsoleColor); + OriginalConsoleColor = -1; + +#else + + fprintf (stderr, "\033[0m"); + +#endif + +} + +static void set_console_color (int color) { + +#if defined (_WIN32) + + HANDLE hStdError = GetStdHandle (STD_ERROR_HANDLE); + WORD wColor; + + if (OriginalConsoleColor == -1) { + + CONSOLE_SCREEN_BUFFER_INFO csbi; + + if (!GetConsoleScreenBufferInfo (hStdError, &csbi)) { + return; + } + + OriginalConsoleColor = csbi.wAttributes; + + } + + wColor = (OriginalConsoleColor & 0xF0) + (color & 0xF); + SetConsoleTextAttribute (hStdError, wColor); + +#else + + fprintf (stderr, "\033[%dm", color); + +#endif + +} +#endif + +static void output_message (const char *filename, unsigned int lineno, unsigned int idx, int type, const char *fmt, va_list ap) { + + if (filename) { + + if (lineno == 0) { + fprintf (stderr, "%s: ", filename); + } else { + fprintf (stderr, "%s:", filename); + } + + } + + if (lineno > 0) { + + if (idx == 0) { + fprintf (stderr, "%u: ", lineno); + } else { + fprintf (stderr, "%u:", lineno); + } + + } + + if (idx > 0) { + fprintf (stderr, "%u: ", idx); + } + + if (type == REPORT_ERROR || type == REPORT_FATAL_ERROR) { + +#ifndef __PDOS__ + set_console_color (COLOR_ERROR); +#endif + + if (type == REPORT_ERROR) { + fprintf (stderr, "error:"); + } else { + fprintf (stderr, "fatal error:"); + } + + } else if (type == REPORT_INTERNAL_ERROR) { + +#ifndef __PDOS__ + set_console_color (COLOR_INTERNAL_ERROR); +#endif + + fprintf (stderr, "internal error:"); + + } else if (type == REPORT_WARNING) { + +#ifndef __PDOS__ + set_console_color (COLOR_WARNING); +#endif + + fprintf (stderr, "warning:"); + + } + +#ifndef __PDOS__ + reset_console_color (); +#endif + + fprintf (stderr, " "); + vfprintf (stderr, fmt, ap); + fprintf (stderr, "\n"); + + if (type != REPORT_WARNING) { + ++errors; + } + +} + +unsigned int get_error_count (void) { + return errors; +} + +void report_at (const char *filename, unsigned int lineno, int type, const char *fmt, ...) { + + va_list ap; + + va_start (ap, fmt); + output_message (filename, lineno, 0, type, fmt, ap); + va_end (ap); + +} + +void report_line_at (const char *filename, unsigned int lineno, int type, const char *str, const char *caret, const char *fmt, ...) { + + int ident = 1; + va_list ap; + + unsigned int idx = 0; + if (str && caret) { idx = (caret - str) + 1; } + + va_start (ap, fmt); + output_message (filename, lineno, idx, type, fmt, ap); + va_end (ap); + + if (str && caret) { + + if (lineno > 0) { + ident = fprintf (stderr, " %8u | ", lineno); + } else { + ident = fprintf (stderr, "%*s", 12, ""); + } + + fprintf (stderr, "%s", str); + + if (str[strlen (str) - 1] != '\n') { + fprintf (stderr, "\n"); + } + + fprintf (stderr, "%*s^\n", (int) (caret - str) + ident, ""); + + } + +} diff --git a/report.h b/report.h new file mode 100644 index 0000000..b7e6e6f --- /dev/null +++ b/report.h @@ -0,0 +1,27 @@ +/****************************************************************************** + * @file report.h + *****************************************************************************/ +#ifndef _REPORT_H +#define _REPORT_H + +#if defined (_WIN32) +# define COLOR_ERROR 12 +# define COLOR_WARNING 13 +# define COLOR_INTERNAL_ERROR 19 +#else +# define COLOR_ERROR 91 +# define COLOR_INTERNAL_ERROR 94 +# define COLOR_WARNING 95 +#endif + +#define REPORT_WARNING 0 +#define REPORT_ERROR 1 +#define REPORT_FATAL_ERROR 3 +#define REPORT_INTERNAL_ERROR 4 + +unsigned int get_error_count (void); + +void report_at (const char *filename, unsigned int lineno, int type, const char *fmt, ...); +void report_line_at (const char *filename, unsigned int lineno, int type, const char *str, const char *caret, const char *fmt, ...); + +#endif /* _REPORT_H */ diff --git a/section.c b/section.c new file mode 100644 index 0000000..8922fa4 --- /dev/null +++ b/section.c @@ -0,0 +1,139 @@ +/****************************************************************************** + * @file section.c + *****************************************************************************/ +#include + +#include "frag.h" +#include "lib.h" +#include "section.h" +#include "symbol.h" + +struct section { + + const char *name; + + struct frag_chain frag_chain; + struct symbol *symbol; + + int alignment_power; + struct section *next; + +}; + +static struct section internal_sections[4]; +static struct symbol section_symbols[4]; + +struct section *undefined_section; +struct section *absolute_section; +struct section *expr_section; +struct section *reg_section; + +struct section *text_section; +struct section *data_section; +struct section *bss_section; + +struct section *current_section; + +struct frag_chain *current_frag_chain = 0; +struct section *sections = 0; + +static struct section *find_or_make_section_by_name (const char *name) { + + struct section *section, **p_next; + + for (p_next = §ions, section = sections; section; p_next = &(section->next), section = *p_next) { + + if (strcmp (name, section->name) == 0) { + break; + } + + } + + if (!section) { + + section = xmalloc (sizeof (*section)); + section->name = xstrdup (name); + + section->symbol = symbol_create (name, section, 0, &zero_address_frag); + section->symbol->flags |= SYMBOL_FLAG_SECTION_SYMBOL; + + symbol_add_to_chain (section->symbol); + *p_next = section; + + } + + return section; + +} + +struct section *section_get_next_section (struct section *section) { + return section->next; +} + +struct section *section_set (struct section *section) { + + current_section = section; + + current_frag_chain = ¤t_section->frag_chain; + current_frag = current_frag_chain->last_frag; + + return section; + +} + +struct section *section_set_by_name (const char *name) { + return section_set (find_or_make_section_by_name (name)); +} + +struct symbol *section_symbol (struct section *section) { + return section->symbol; +} + +const char *section_get_name (struct section *section) { + return section->name; +} + +void section_record_alignment_power (struct section *section, int alignment_power) { + + if (alignment_power > section->alignment_power) { + section->alignment_power = alignment_power; + } + +} + + +#define CREATE_INTERNAL_SECTION(section_var, section_name, section_index) \ + (section_var) = &internal_sections[(section_index)]; \ + (section_var)->name = (section_name); \ + (section_var)->symbol = §ion_symbols[(section_index)]; \ + (section_var)->symbol->name = (section_name); \ + (section_var)->symbol->section = (section_var); \ + (section_var)->symbol->frag = &zero_address_frag; \ + symbol_set_value ((section_var)->symbol, 0); \ + (section_var)->symbol->flags |= SYMBOL_FLAG_SECTION_SYMBOL + +void sections_init (void) { + + CREATE_INTERNAL_SECTION (undefined_section, "*UND*", 0); + CREATE_INTERNAL_SECTION (absolute_section, "*ABS*", 1); + CREATE_INTERNAL_SECTION (expr_section, "*EXPR*", 2); + CREATE_INTERNAL_SECTION (reg_section, "*REG*", 3); + + text_section = section_set_by_name (".text"); + text_section->frag_chain.last_frag = text_section->frag_chain.first_frag = frag_alloc (); + text_section->frag_chain.last_fixup = text_section->frag_chain.first_fixup = 0; + + data_section = section_set_by_name (".data"); + data_section->frag_chain.last_frag = data_section->frag_chain.first_frag = frag_alloc (); + data_section->frag_chain.last_fixup = data_section->frag_chain.first_fixup = 0; + + bss_section = section_set_by_name (".bss"); + bss_section->frag_chain.last_frag = bss_section->frag_chain.first_frag = frag_alloc (); + bss_section->frag_chain.last_fixup = bss_section->frag_chain.first_fixup = 0; + + /* .text section is the default section. */ + section_set (text_section); + +} + +#undef CREATE_INTERNAL_SECTION diff --git a/section.h b/section.h new file mode 100644 index 0000000..d4faeaa --- /dev/null +++ b/section.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * @file section.h + *****************************************************************************/ +#ifndef _SECTION_H +#define _SECTION_H + +struct frag_chain { + + struct fixup *first_fixup, *last_fixup; + struct frag *first_frag, *last_frag; + + struct frag_chain *next; + +}; + +#define SECTION_IS_NORMAL(section) \ + ((section != undefined_section) && (section != absolute_section) && (section != expr_section) && (section != reg_section)) + +extern struct section *undefined_section; +extern struct section *absolute_section; +extern struct section *expr_section; +extern struct section *reg_section; + +extern struct section *text_section; +extern struct section *data_section; +extern struct section *bss_section; + +extern struct section *current_section; + +extern struct frag_chain *current_frag_chain; +extern struct section *sections; + +struct section *section_get_next_section (struct section *section); +struct section *section_set (struct section *section); +struct section *section_set_by_name (const char *name); + +struct symbol *section_symbol (struct section *section); +const char *section_get_name (struct section *section); + +void section_record_alignment_power (struct section *section, int alignment_power); + +#endif /* _SECTION_H */ diff --git a/symbol.c b/symbol.c new file mode 100644 index 0000000..9f069d2 --- /dev/null +++ b/symbol.c @@ -0,0 +1,788 @@ +/****************************************************************************** + * @file symbol.c + *****************************************************************************/ +#include +#include + +#include "as.h" +#include "expr.h" +#include "frag.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +static struct symbol **pointer_to_pointer_to_next_symbol = &symbols; + +struct symbol *symbols = 0; +int finalize_symbols = 0; + +static void report_op_error (struct symbol *symbol, struct symbol *left, enum expr_type op, struct symbol *right) { + + const char *op_name = 0; + + struct section *left_section = left ? symbol_get_section (left) : 0; + struct section *right_section = symbol_get_section (right); + + const char *filename; + unsigned long line_number; + + switch (op) { + + case EXPR_TYPE_LOGICAL_OR: + + op_name = "||"; + break; + + case EXPR_TYPE_LOGICAL_AND: + + op_name = "&&"; + break; + + case EXPR_TYPE_EQUAL: + + op_name = "=="; + break; + + case EXPR_TYPE_NOT_EQUAL: + + op_name = "!="; + break; + + case EXPR_TYPE_LESSER: + + op_name = "<"; + break; + + case EXPR_TYPE_LESSER_EQUAL: + + op_name = "<="; + break; + + case EXPR_TYPE_GREATER: + + op_name = ">"; + break; + + case EXPR_TYPE_GREATER_EQUAL: + + op_name = ">="; + break; + + case EXPR_TYPE_ADD: + + op_name = "+"; + break; + + case EXPR_TYPE_SUBTRACT: + + op_name = "-"; + break; + + case EXPR_TYPE_BIT_INCLUSIVE_OR: + + op_name = "|"; + break; + + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + + op_name = "^"; + break; + + case EXPR_TYPE_BIT_AND: + + op_name = "&"; + break; + + case EXPR_TYPE_MULTIPLY: + + op_name = "*"; + break; + + case EXPR_TYPE_DIVIDE: + + op_name = "/"; + break; + + case EXPR_TYPE_MODULUS: + + op_name = "%"; + break; + + case EXPR_TYPE_LEFT_SHIFT: + + op_name = "<<"; + break; + + case EXPR_TYPE_RIGHT_SHIFT: + + op_name = ">>"; + break; + + case EXPR_TYPE_LOGICAL_NOT: + + op_name = "!"; + break; + + case EXPR_TYPE_BIT_NOT: + + op_name = "~"; + break; + + case EXPR_TYPE_UNARY_MINUS: + + op_name = "-"; + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "report_op_error invalid case %i", op); + exit (EXIT_FAILURE); + + } + + if (expr_symbol_get_filename_and_line_number (symbol, &filename, &line_number) == 0) { + + if (left) { + report_at (filename, line_number, REPORT_ERROR, "invalid operands (%s and %s sections) for `%s'", section_get_name (left_section), section_get_name (right_section), op_name); + } else { + report_at (filename, line_number, REPORT_ERROR, "invalid operand (%s section) for `%s'", section_get_name (right_section), op_name); + } + + } else { + + if (left) { + report_at (program_name, 0, REPORT_ERROR, "invalid operands (%s and %s sections) for `%s' when setting `%s'", section_get_name (left_section), section_get_name (right_section), op_name, symbol_get_name (symbol)); + } else { + report_at (program_name, 0, REPORT_ERROR, "invalid operand (%s section) for `%s' when setting `%s'", section_get_name (right_section), op_name, symbol_get_name (symbol)); + } + + } + +} + +struct expr *symbol_get_value_expression (struct symbol *symbol) { + return &(symbol->value); +} + +struct frag *symbol_get_frag (struct symbol *symbol) { + return symbol->frag; +} + +struct section *symbol_get_section (struct symbol *symbol) { + return symbol->section; +} + +struct symbol *symbol_create (const char *name, struct section *section, unsigned long value, struct frag *frag) { + + struct symbol *symbol = xmalloc (sizeof (*symbol)); + + symbol->name = xstrdup (name); + symbol->section = section; + symbol->frag = frag; + + symbol_set_value (symbol, value); + return symbol; + +} + +struct symbol *symbol_find (const char *name) { + + struct symbol *symbol; + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (strcmp (symbol->name, name) == 0) { + break; + } + + } + + return symbol; + +} + +struct symbol *symbol_find_or_make (char *name, int scope) { + + struct symbol *symbol = symbol_find (name); + + if (!symbol) { + + symbol = symbol_make (name); + symbol->scope = scope; + + symbol_add_to_chain (symbol); + + } + + return symbol; + +} + +struct symbol *symbol_label (char *start, char *caret, char *name) { + + struct symbol *symbol = 0; + + if ((symbol = symbol_find (name))) { + + if (symbol->section == undefined_section) { + + symbol->section = current_section; + symbol->frag = current_frag; + + symbol_set_value (symbol, current_frag->fixed_size); + + } else { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "symbol '%s' is already defined", name); + } + + } else { + + if (xstrcasecmp (name, "DGROUP") == 0 || strcmp (name, "_end") == 0 || strcmp (name, "_edata") == 0) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "symbol '%s' is already defined", name); + } else { + + symbol = symbol_create (name, current_section, current_frag->fixed_size, current_frag); + symbol_add_to_chain (symbol); + + } + + } + + return symbol; + +} + +struct symbol *symbol_make (const char *name) { + return symbol_create (name, undefined_section, 0, &zero_address_frag); +} + +struct symbol *symbol_temp_new_now (void) { + return symbol_create (FAKE_LABEL_NAME, current_section, current_frag->fixed_size, current_frag); +} + +char *symbol_get_name (struct symbol *symbol) { + return symbol->name; +} + +int get_symbol_snapshot (struct symbol **symbol_p, unsigned long *value_p, struct section **section_p, struct frag **frag_p) { + + struct symbol *symbol = *symbol_p; + struct expr *expr = symbol_get_value_expression (symbol); + + if (!symbol_is_resolved (symbol) && expr->type != EXPR_TYPE_INVALID) { + + int resolved; + + if (symbol->resolving) { + return 1; + } + + symbol->resolving = 1; + resolved = resolve_expression (expr); + symbol->resolving = 0; + + if (resolved == 0) { + return 1; + } + + switch (expr->type) { + + case EXPR_TYPE_CONSTANT: + case EXPR_TYPE_REGISTER: + + if (!symbol_uses_other_symbol (symbol)) { + break; + } + + /* fall through. */ + + case EXPR_TYPE_SYMBOL: + + symbol = expr->add_symbol; + break; + + default: + + return 1; + + } + + } + + *value_p = expr->add_number; + *symbol_p = symbol; + + *section_p = symbol_get_section (symbol); + *frag_p = symbol_get_frag (symbol); + + if (*section_p == expr_section) { + + switch (expr->type) { + + case EXPR_TYPE_CONSTANT: + + *section_p = absolute_section; + break; + + case EXPR_TYPE_REGISTER: + + *section_p = reg_section; + break; + + default: + + break; + + } + + } + + return 0; + +} + +int symbol_force_reloc (struct symbol *symbol) { + return symbol->section == undefined_section; +} + +int symbol_is_external (struct symbol *symbol) { + return symbol->flags & SYMBOL_FLAG_EXTERNAL; +} + +int symbol_is_resolved (struct symbol *symbol) { + return symbol->resolved; +} + +int symbol_is_section_symbol (struct symbol *symbol) { + return symbol->flags & SYMBOL_FLAG_SECTION_SYMBOL; +} + +int symbol_is_undefined (struct symbol *symbol) { + return symbol->section == undefined_section; +} + +int symbol_uses_other_symbol (struct symbol *symbol) { + return (symbol->value.type == EXPR_TYPE_SYMBOL); +} + +int symbol_uses_reloc_symbol (struct symbol *symbol) { + return (symbol->value.type == EXPR_TYPE_SYMBOL && ((symbol_is_resolved (symbol) && symbol->value.op_symbol) || symbol_is_undefined (symbol))); +} + +unsigned long symbol_get_value (struct symbol *symbol) { + return symbol_resolve_value (symbol); +} + +unsigned long symbol_resolve_value (struct symbol *symbol) { + + struct section *final_section = symbol_get_section (symbol); + int resolved = 0; + + unsigned long final_value = 0; + + if (symbol->resolved) { + + if (symbol->value.type == EXPR_TYPE_CONSTANT) { + final_value = symbol->value.add_number; + } + + return final_value; + + } + + if (symbol->resolving) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "symbol definition loop encountered at '%s'", symbol_get_name (symbol)); + + final_value = 0; + resolved = 1; + + } else { + + struct section *left_section, *right_section; + unsigned long left_value, right_value; + + int can_move_into_absolute_section; + + final_value = symbol->value.add_number; + symbol->resolving = 1; + + switch (symbol->value.type) { + + case EXPR_TYPE_ABSENT: + + final_value = 0; + /* fall through */ + + case EXPR_TYPE_CONSTANT: + + final_value += symbol->frag->address; + + if (final_section == expr_section) { + final_section = absolute_section; + } + + /* fall through */ + + case EXPR_TYPE_REGISTER: + + resolved = 1; + break; + + case EXPR_TYPE_SYMBOL: + + left_value = symbol_resolve_value (symbol->value.add_symbol); + left_section = symbol_get_section (symbol->value.add_symbol); + + do_symbol: + + if (left_section == undefined_section || (finalize_symbols && final_section == expr_section && left_section != expr_section && left_section != absolute_section)) { + + if (finalize_symbols) { + + symbol->value.type = EXPR_TYPE_SYMBOL; + + symbol->value.op_symbol = symbol->value.add_symbol; + symbol->value.add_number = final_value; + + } + + final_value += symbol->frag->address + left_value; + final_section = left_section; + + resolved = symbol_is_resolved (symbol->value.add_symbol); + symbol->resolving = 0; + + goto exit_do_not_set_value; + + } else { + + final_value += symbol->frag->address + left_value; + + if (final_section == expr_section || final_section == undefined_section) { + final_section = left_section; + } + + } + + resolved = symbol_is_resolved (symbol->value.add_symbol); + break; + + case EXPR_TYPE_LOGICAL_NOT: + case EXPR_TYPE_BIT_NOT: + case EXPR_TYPE_UNARY_MINUS: + + left_value = symbol_resolve_value (symbol->value.add_symbol); + left_section = symbol_get_section (symbol->value.add_symbol); + + if (symbol->value.type != EXPR_TYPE_LOGICAL_NOT && left_section != absolute_section && finalize_symbols) { + report_op_error (symbol, 0, symbol->value.type, symbol->value.add_symbol); + } + + if (final_section == expr_section || final_section == undefined_section) { + final_section = absolute_section; + } + + switch (symbol->value.type) { + + case EXPR_TYPE_LOGICAL_NOT: + + left_value = !left_value; + break; + + case EXPR_TYPE_BIT_NOT: + + left_value = ~left_value; + break; + + case EXPR_TYPE_UNARY_MINUS: + + left_value = -left_value; + break; + + default: + + break; + + } + + resolved = symbol_is_resolved (symbol->value.add_symbol); + final_value += left_value + symbol->frag->address; + + break; + + case EXPR_TYPE_LOGICAL_OR: + case EXPR_TYPE_LOGICAL_AND: + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + case EXPR_TYPE_LESSER: + case EXPR_TYPE_LESSER_EQUAL: + case EXPR_TYPE_GREATER: + case EXPR_TYPE_GREATER_EQUAL: + case EXPR_TYPE_ADD: + case EXPR_TYPE_SUBTRACT: + case EXPR_TYPE_BIT_INCLUSIVE_OR: + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + case EXPR_TYPE_BIT_AND: + case EXPR_TYPE_MULTIPLY: + case EXPR_TYPE_DIVIDE: + case EXPR_TYPE_MODULUS: + case EXPR_TYPE_LEFT_SHIFT: + case EXPR_TYPE_RIGHT_SHIFT: + + left_value = symbol_resolve_value (symbol->value.add_symbol); + left_section = symbol_get_section (symbol->value.add_symbol); + + right_value = symbol_resolve_value (symbol->value.op_symbol); + right_section = symbol_get_section (symbol->value.op_symbol); + + if (symbol->value.type == EXPR_TYPE_ADD) { + + if (right_section == absolute_section) { + + final_value += right_value; + goto do_symbol; + + } else if (left_section == absolute_section) { + + symbol->value.add_symbol = symbol->value.op_symbol; + final_value += left_value; + + left_value = right_value; + left_section = right_section; + + goto do_symbol; + + } + + } else if (symbol->value.type == EXPR_TYPE_SUBTRACT) { + + if (right_section == absolute_section) { + + final_value -= right_value; + goto do_symbol; + + } + + } + + can_move_into_absolute_section = 1; + + /** + * Equality and non-equality operations are allowed on everything. + * Subtraction and other comparison operators are allowed if both operands are in the same section. + * For everything else, both operands must be absolute. + * Addition and subtraction of constants is handled above. + */ + if (!(left_section == absolute_section && right_section == absolute_section) + && !(symbol->value.type == EXPR_TYPE_EQUAL || symbol->value.type == EXPR_TYPE_NOT_EQUAL) + && !((symbol->value.type == EXPR_TYPE_SUBTRACT + || symbol->value.type == EXPR_TYPE_LESSER || symbol->value.type == EXPR_TYPE_LESSER_EQUAL + || symbol->value.type == EXPR_TYPE_GREATER || symbol->value.type == EXPR_TYPE_GREATER_EQUAL) + && left_section == right_section + && (left_section != undefined_section || symbol->value.add_symbol == symbol->value.op_symbol))) + { + + if (finalize_symbols) { + report_op_error (symbol, symbol->value.add_symbol, symbol->value.type, symbol->value.op_symbol); + } else { + can_move_into_absolute_section = 0; + } + + } + + if (can_move_into_absolute_section && (final_section == expr_section || final_section == undefined_section)) { + final_section = absolute_section; + } + + if ((symbol->value.type == EXPR_TYPE_DIVIDE || symbol->value.type == EXPR_TYPE_MODULUS) && right_value == 0) { + + const char *filename; + unsigned long line_number; + + if (expr_symbol_get_filename_and_line_number (symbol, &filename, &line_number) == 0) { + report_at (filename, line_number, REPORT_ERROR, "division by zero"); + } else { + report_at (0, 0, REPORT_ERROR, "division by zero when setting '%s'", symbol_get_name (symbol)); + } + + right_value = 1; + + } + + switch (symbol->value.type) { + + case EXPR_TYPE_LOGICAL_OR: + + left_value = left_value || right_value; + break; + + case EXPR_TYPE_LOGICAL_AND: + + left_value = left_value && right_value; + break; + + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + + left_value = ((left_value == right_value && left_section == right_section && (left_section != undefined_section || symbol->value.add_symbol == symbol->value.op_symbol)) ? ~(signed long) 0 : 0); + + if (symbol->value.type == EXPR_TYPE_NOT_EQUAL) { + left_value = ~left_value; + } + + break; + + case EXPR_TYPE_LESSER: + + left_value = left_value < right_value ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_LESSER_EQUAL: + + left_value = left_value <= right_value ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_GREATER: + + left_value = left_value > right_value ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_GREATER_EQUAL: + + left_value = left_value >= right_value ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_ADD: + + left_value += right_value; + break; + + case EXPR_TYPE_SUBTRACT: + + left_value -= right_value; + break; + + case EXPR_TYPE_BIT_INCLUSIVE_OR: + + left_value |= right_value; + break; + + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + + left_value ^= right_value; + break; + + case EXPR_TYPE_BIT_AND: + + left_value &= right_value; + break; + + case EXPR_TYPE_MULTIPLY: + + left_value *= right_value; + break; + + case EXPR_TYPE_DIVIDE: + + left_value /= right_value; + break; + + case EXPR_TYPE_MODULUS: + + left_value %= right_value; + break; + + case EXPR_TYPE_LEFT_SHIFT: + + left_value = ~(unsigned long) left_value << ~(unsigned long) right_value; + break; + + case EXPR_TYPE_RIGHT_SHIFT: + + left_value = ~(unsigned long) left_value >> ~(unsigned long) right_value; + break; + + default: + + break; + + } + + final_value += symbol->frag->address + left_value; + + if (final_section == expr_section || final_section == undefined_section) { + + if (left_section == undefined_section || right_section == undefined_section) { + final_section = undefined_section; + } else if (left_section == absolute_section) { + final_section = right_section; + } else { + final_section = left_section; + } + + } + + resolved = (symbol_is_resolved (symbol->value.add_symbol) && symbol_is_resolved (symbol->value.op_symbol)); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "symbol_resolve_value invalid case %i", symbol->value.type); + exit (EXIT_FAILURE); + + } + + symbol->resolving = 0; + + } + + if (finalize_symbols) { + symbol_set_value (symbol, final_value); + } + +exit_do_not_set_value: + + if (finalize_symbols) { + + if (resolved) { + symbol->resolved = resolved; + } + + } + + symbol_set_section (symbol, final_section); + return final_value; + +} + +void symbol_add_to_chain (struct symbol *symbol) { + + *pointer_to_pointer_to_next_symbol = symbol; + pointer_to_pointer_to_next_symbol = &symbol->next; + +} + +void symbol_set_frag (struct symbol *symbol, struct frag *frag) { + symbol->frag = frag; +} + +void symbol_set_external (struct symbol *symbol) { + symbol->flags |= SYMBOL_FLAG_EXTERNAL; +} + +void symbol_set_section (struct symbol *symbol, struct section *section) { + symbol->section = section; +} + +void symbol_set_value (struct symbol *symbol, unsigned long value) { + + symbol->value.type = EXPR_TYPE_CONSTANT; + symbol->value.add_number = value; + +} + +void symbol_set_value_expression (struct symbol *symbol, struct expr *expr) { + symbol->value = *expr; +} diff --git a/symbol.h b/symbol.h new file mode 100644 index 0000000..3c5760e --- /dev/null +++ b/symbol.h @@ -0,0 +1,69 @@ +/****************************************************************************** + * @file symbol.h + *****************************************************************************/ +#ifndef _SYMBOL_H +#define _SYMBOL_H + +#include "expr.h" + +#define SYMBOL_FLAG_EXTERNAL 0x01 +#define SYMBOL_FLAG_SECTION_SYMBOL 0x02 + +struct symbol { + + char *name; + int scope; + + struct section *section; + struct frag *frag; + + struct expr value; + int flags; + + int resolved, resolving; + struct symbol *next; + +}; + +#define FAKE_LABEL_NAME "FAKE_SASM_SYMBOL" +extern struct symbol *symbols; +extern int finalize_symbols; + +#define SYMBOL_SCOPE_LOCAL 0x01 +#define SYMBOL_SCOPE_GLOBAL 0x02 +#define SYMBOL_SCOPE_EXTERN 0x03 + +struct expr *symbol_get_value_expression (struct symbol *symbol); + +char *symbol_get_name (struct symbol *symbol); +int get_symbol_snapshot (struct symbol **symbol_p, unsigned long *value_p, struct section **section_p, struct frag **frag_p); + +struct frag *symbol_get_frag (struct symbol *symbol); +struct section *symbol_get_section (struct symbol *symbol); + +struct symbol *symbol_create (const char *name, struct section *section, unsigned long value, struct frag *frag); +struct symbol *symbol_find (const char *name); +struct symbol *symbol_find_or_make (char *name, int scope); +struct symbol *symbol_label (char *start, char *caret, char *name); +struct symbol *symbol_make (const char *name); +struct symbol *symbol_temp_new_now (void); + +int symbol_force_reloc (struct symbol *symbol); +int symbol_is_external (struct symbol *symbol); +int symbol_is_resolved (struct symbol *symbol); +int symbol_is_section_symbol (struct symbol *symbol); +int symbol_is_undefined (struct symbol *symbol); +int symbol_uses_other_symbol (struct symbol *symbol); +int symbol_uses_reloc_symbol (struct symbol *symbol); + +unsigned long symbol_get_value (struct symbol *symbol); +unsigned long symbol_resolve_value (struct symbol *symbol); + +void symbol_add_to_chain (struct symbol *symbol); +void symbol_set_frag (struct symbol *symbol, struct frag *frag); +void symbol_set_external (struct symbol *symbol); +void symbol_set_section (struct symbol *symbol, struct section *section); +void symbol_set_value (struct symbol *symbol, unsigned long value); +void symbol_set_value_expression (struct symbol *symbol, struct expr *expr); + +#endif /* _SYMBOL_H */ diff --git a/vector.c b/vector.c new file mode 100644 index 0000000..3984039 --- /dev/null +++ b/vector.c @@ -0,0 +1,54 @@ +/****************************************************************************** + * @file vector.c + *****************************************************************************/ +#include +#include + +#include "vector.h" + +extern void *xrealloc (void *__ptr, unsigned int __size); + +int vec_adjust (struct vector *vec, int length) { + + if (vec->capacity <= length) { + + if (vec->capacity == 0) { + vec->capacity = 16; + } else { + vec->capacity <<= 1; + } + + vec->data = xrealloc (vec->data, sizeof (*(vec->data)) * vec->capacity); + + } + + return 0; + +} + +void *vec_pop (struct vector *vec) { + + if (!vec || vec == NULL) { + return NULL; + } + + if (vec->length == 0) { + return NULL; + } + + return vec->data[--vec->length]; + +} + +int vec_push (struct vector *vec, void *elem) { + + int ret; + + if ((ret = vec_adjust (vec, vec->length)) != 0) { + return ret; + } + + vec->data[vec->length++] = elem; + return 0; + +} diff --git a/vector.h b/vector.h new file mode 100644 index 0000000..29d957a --- /dev/null +++ b/vector.h @@ -0,0 +1,19 @@ +/****************************************************************************** + * @file vector.h + *****************************************************************************/ +#ifndef _VECTOR_H +#define _VECTOR_H + +struct vector { + + void **data; + int capacity, length; + +}; + +int vec_adjust (struct vector *vec, int length); +int vec_push (struct vector *vec, void *elem); + +void *vec_pop (struct vector *vec); + +#endif /* _VECTOR_H */